camenduru commited on Aug 5, 2023

Commit

b4e342b

1 Parent(s): 599b29e

thanks to shubham-goel ❤

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +7 -0
slahmr/.gitignore +140 -0
slahmr/.gitmodules +6 -0
slahmr/LICENSE +21 -0
slahmr/README.md +167 -0
slahmr/download_models.sh +5 -0
slahmr/env.yaml +45 -0
slahmr/env_build.yaml +127 -0
slahmr/install.sh +35 -0
slahmr/requirements.txt +27 -0
slahmr/setup.py +9 -0
slahmr/slahmr.zip +3 -0
slahmr/slahmr/__init__.py +0 -0
slahmr/slahmr/body_model/__init__.py +3 -0
slahmr/slahmr/body_model/body_model.py +142 -0
slahmr/slahmr/body_model/specs.py +554 -0
slahmr/slahmr/body_model/utils.py +56 -0
slahmr/slahmr/confs/config.yaml +51 -0
slahmr/slahmr/confs/data/3dpw.yaml +18 -0
slahmr/slahmr/confs/data/3dpw_gt.yaml +18 -0
slahmr/slahmr/confs/data/custom.yaml +17 -0
slahmr/slahmr/confs/data/davis.yaml +16 -0
slahmr/slahmr/confs/data/egobody.yaml +18 -0
slahmr/slahmr/confs/data/posetrack.yaml +17 -0
slahmr/slahmr/confs/data/video.yaml +24 -0
slahmr/slahmr/confs/init.yaml +13 -0
slahmr/slahmr/confs/optim.yaml +51 -0
slahmr/slahmr/data/__init__.py +2 -0
slahmr/slahmr/data/dataset.py +438 -0
slahmr/slahmr/data/tools.py +108 -0
slahmr/slahmr/data/vidproc.py +82 -0
slahmr/slahmr/eval/__init__.py +0 -0
slahmr/slahmr/eval/associate.py +161 -0
slahmr/slahmr/eval/egobody_utils.py +171 -0
slahmr/slahmr/eval/run_eval.py +289 -0
slahmr/slahmr/eval/split_3dpw.py +99 -0
slahmr/slahmr/eval/split_egobody.py +123 -0
slahmr/slahmr/eval/tools.py +181 -0
slahmr/slahmr/geometry/__init__.py +5 -0
slahmr/slahmr/geometry/camera.py +348 -0
slahmr/slahmr/geometry/mesh.py +110 -0
slahmr/slahmr/geometry/pcl.py +60 -0
slahmr/slahmr/geometry/plane.py +101 -0
slahmr/slahmr/geometry/rotation.py +284 -0
slahmr/slahmr/humor/__init__.py +0 -0
slahmr/slahmr/humor/amass_utils.py +148 -0
slahmr/slahmr/humor/humor_model.py +1655 -0
slahmr/slahmr/humor/transforms.py +472 -0
slahmr/slahmr/job_specs/3dpw_test_split.txt +248 -0
slahmr/slahmr/job_specs/davis.txt +24 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 lietorch-0.2-py3.10-linux-x86_64.egg filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 lietorch-0.2-py3.10-linux-x86_64.egg filter=lfs diff=lfs merge=lfs -text
+slahmr/teaser.png filter=lfs diff=lfs merge=lfs -text
+slahmr/third-party/DROID-SLAM/build/lib.linux-x86_64-3.10/droid_backends.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+slahmr/third-party/DROID-SLAM/build/lib.linux-x86_64-3.10/lietorch_backends.cpython-310-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+slahmr/third-party/DROID-SLAM/build/temp.linux-x86_64-3.10/src/droid_kernels.o filter=lfs diff=lfs merge=lfs -text
+slahmr/third-party/DROID-SLAM/build/temp.linux-x86_64-3.10/thirdparty/lietorch/lietorch/src/lietorch_gpu.o filter=lfs diff=lfs merge=lfs -text
+slahmr/third-party/DROID-SLAM/thirdparty/lietorch/examples/registration/assets/registration.gif filter=lfs diff=lfs merge=lfs -text
+slahmr/third-party/ViTPose/demo/resources/demo_coco.gif filter=lfs diff=lfs merge=lfs -text

slahmr/.gitignore ADDED Viewed

	@@ -0,0 +1,140 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# data
+*outputs*
+*renders*
+*cache*
+*checkpoints*
+*_DATA
+*.swp
+*.out
+*.err
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/

slahmr/.gitmodules ADDED Viewed

	@@ -0,0 +1,6 @@

+[submodule "third-party/DROID-SLAM"]
+	path = third-party/DROID-SLAM
+	url = https://github.com/princeton-vl/DROID-SLAM.git
+[submodule "third-party/ViTPose"]
+	path = third-party/ViTPose
+	url = https://github.com/ViTAE-Transformer/ViTPose.git

slahmr/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 vye16
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

slahmr/README.md ADDED Viewed

	@@ -0,0 +1,167 @@

+# Decoupling Human and Camera Motion from Videos in the Wild
+Official PyTorch implementation of the paper Decoupling Human and Camera Motion from Videos in the Wild
+[Project page](https://vye16.github.io/slahmr/) | [ArXiv](https://arxiv.org/abs/2302.12827)
+<img src="./teaser.png">
+##  [<img src="https://i.imgur.com/QCojoJk.png" width="40"> You can run SLAHMR in Google Colab](https://colab.research.google.com/drive/1knzxW3XuxiaBH6hcwx01cs6DfA4azv5E?usp=sharing)
+## News
+- [2023/07] We updated the code to support tracking from [4D Humans](https://shubham-goel.github.io/4dhumans/)! The original code remains in the `release` branch.
+- [2023/02] Original release!
+## Getting started
+This code was tested on Ubuntu 22.04 LTS and requires a CUDA-capable GPU.
+1. Clone repository and submodules
+    ```
+    git clone --recursive https://github.com/vye16/slahmr.git
+    ```
+    or initialize submodules if already cloned
+    ```
+    git submodule update --init --recursive
+    ```
+2. Set up conda environment. Run
+    ```
+    source install.sh
+    ```
+    <details>
+        <summary>We also include the following steps for trouble-shooting.</summary>
+    * Create environment
+        ```
+        conda env create -f env.yaml
+        conda activate slahmr
+        ```
+        We use PyTorch 1.13.0 with CUDA 11.7. Please modify according to your setup; we've tested successfully for PyTorch 1.11 as well.
+        We've also included `env_build.yaml` to speed up installation using already-solved dependencies, though it might not be compatible with your CUDA driver.
+    * Install PHALP
+        ```
+        pip install phalp[all]@git+https://github.com/brjathu/PHALP.git
+        ```
+    * Install current source repo
+        ```
+        pip install -e .
+        ```
+    * Install ViTPose
+        ```
+        pip install -v -e third-party/ViTPose
+        ```
+    * Install DROID-SLAM (will take a while)
+        ```
+        cd third-party/DROID-SLAM
+        python setup.py install
+        ```
+    </details>
+3. Download models from [here](https://drive.google.com/file/d/1GXAd-45GzGYNENKgQxFQ4PHrBp8wDRlW/view?usp=sharing). Run
+    ```
+    ./download_models.sh
+    ```
+    or
+    ```
+    gdown https://drive.google.com/uc?id=1GXAd-45GzGYNENKgQxFQ4PHrBp8wDRlW
+    unzip -q slahmr_dependencies.zip
+    rm slahmr_dependencies.zip
+    ```
+    All models and checkpoints should have been unpacked in `_DATA`.
+## Fitting to an RGB video:
+For a custom video, you can edit the config file: `slahmr/confs/data/video.yaml`.
+Then, from the `slahmr` directory, you can run:
+```
+python run_opt.py data=video run_opt=True run_vis=True
+```
+We use hydra to launch experiments, and all parameters can be found in `slahmr/confs/config.yaml`.
+If you would like to update any aspect of logging or optimization tuning, update the relevant config files.
+By default, we will log each run to `outputs/video-val/<DATE>/<VIDEO_NAME>`.
+Each stage of optimization will produce a separate subdirectory, each of which will contain outputs saved throughout the optimization
+and rendered videos of the final result for that stage of optimization.
+The `motion_chunks` directory contains the outputs of the final stage of optimization,
+`root_fit` and `smooth_fit` contain outputs of short, intermediate stages of optimization,
+and `init` contains the initialized outputs before optimization.
+We've provided a `run_vis.py` script for running visualization from logs after optimization.
+From the `slahmr` directory, run
+```
+python run_vis.py --log_root <LOG_ROOT>
+```
+and it will visualize all log subdirectories in `<LOG_ROOT>`.
+Each output npz file will contain the SMPL parameters for all optimized people, the camera intrinsics and extrinsics.
+The `motion_chunks` output will contain additional predictions from the motion prior.
+Please see `run_vis.py` for how to extract the people meshes from the output parameters.
+## Fitting to specific datasets:
+We provide configurations for dataset formats in `slahmr/confs/data`:
+1. Posetrack in `slahmr/confs/data/posetrack.yaml`
+2. Egobody in `slahmr/confs/data/egobody.yaml`
+3. 3DPW in `slahmr/confs/data/3dpw.yaml`
+4. Custom video in `slahmr/confs/data/video.yaml`
+**Please make sure to update all paths to data in the config files.**
+We include tools to both process existing datasets we evaluated on in the paper, and to process custom data and videos.
+We include experiments from the paper on the Egobody, Posetrack, and 3DPW datasets.
+If you want to run on a large number of videos, or if you want to select specific people tracks for optimization,
+we recommend preprocesing in advance.
+For a single downloaded video, there is no need to run preprocessing in advance.
+From the `slahmr/preproc` directory, run PHALP on all your sequences
+```
+python launch_phalp.py --type <DATASET_TYPE> --root <DATASET_ROOT> --split <DATASET_SPLIT> --gpus <GPUS>
+```
+and run DROID-SLAM on all your sequences
+```
+python launch_slam.py --type <DATASET_TYPE> --root <DATASET_ROOT> --split <DATASET_SPLIT> --gpus <GPUS>
+```
+You can also update the paths to datasets in `slahmr/preproc/datasets.py` for repeated use.
+Then, from the `slahmr` directory,
+```
+python run_opt.py data=<DATA_CFG> run_opt=True run_vis=True
+```
+We've provided a helper script `launch.py` for launching many optimization jobs in parallel.
+You can specify job-specific arguments with a job spec file, such as the example files in `job_specs`,
+and batch-specific arguments shared across all jobs as
+```
+python launch.py --gpus 1 2 -f job_specs/pt_val_shots.txt -s data=posetrack exp_name=posetrack_val
+```
+## Evaluation on 3D datasets
+After launching and completing optimization on either the Egobody or 3DPW datasets,
+you can evaluate the outputs with scripts in the `eval` directory.
+Before running, please update `EGOBODY_ROOT` and `TDPW_ROOT` in `eval/tools.py`.
+Then, run
+```
+python run_eval.py -d <DSET_TYPE> -i <RES_ROOT> -f <JOB_FILE>
+```
+where `<JOB_FILE>` is the same job file used to launch all optimization runs.
+## BibTeX
+If you use our code in your research, please cite the following paper:
+```
+@inproceedings{ye2023slahmr,
+    title={Decoupling Human and Camera Motion from Videos in the Wild},
+    author={Ye, Vickie and Pavlakos, Georgios and Malik, Jitendra and Kanazawa, Angjoo},
+    booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+    month={June},
+    year={2023}
+}

slahmr/download_models.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+#!/usr/bin/bash
+# download models
+gdown https://drive.google.com/uc?id=1GXAd-45GzGYNENKgQxFQ4PHrBp8wDRlW
+unzip -q slahmr_dependencies.zip
+rm slahmr_dependencies.zip

slahmr/env.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+name: slahmr
+channels:
+  - conda-forge
+  - pytorch
+  - nvidia
+  - rusty1s
+dependencies:
+  - python=3.9
+  - pytorch
+  - pytorch-cuda=11.7
+  - torchvision
+  - pytorch-scatter
+  - suitesparse
+  - pip
+  - pip:
+    - git+https://github.com/facebookresearch/detectron2.git
+    - git+https://github.com/brjathu/pytube.git
+    - git+https://github.com/nghorbani/configer
+    - setuptools==59.5.0
+    - torchgeometry==0.1.2
+    - tensorboard
+    - smplx
+    - pyrender
+    - open3d
+    - imageio-ffmpeg
+    - matplotlib
+    - opencv-python
+    - scipy
+    - scikit-image
+    - scikit-learn==0.22
+    - joblib
+    - cython
+    - tqdm
+    - hydra-core
+    - pyyaml
+    - chumpy
+    - gdown
+    - dill
+    - motmetrics
+    - scenedetect[opencv]
+    - einops
+    - mmcv==1.3.9
+    - timm==0.4.9
+    - xtcocotools==1.10
+    - pandas==1.4.0

slahmr/env_build.yaml ADDED Viewed

	@@ -0,0 +1,127 @@

+name: slahmr2
+channels:
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - bzip2=1.0.8=h7b6447c_0
+  - ca-certificates=2023.05.30=h06a4308_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - libffi=3.4.4=h6a678d5_0
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - libuuid=1.41.5=h5eee18b_0
+  - ncurses=6.4=h6a678d5_0
+  - openssl=3.0.9=h7f8727e_0
+  - python=3.10.12=h955ad1f_0
+  - readline=8.2=h5eee18b_0
+  - sqlite=3.41.2=h5eee18b_0
+  - tk=8.6.12=h1ccaba5_0
+  - tzdata=2023c=h04d1e81_0
+  - xz=5.4.2=h5eee18b_0
+  - zlib=1.2.13=h5eee18b_0
+  - pip:
+    - addict==2.4.0
+    - ansi2html==1.8.0
+    - appdirs==1.4.4
+    - asttokens==2.2.1
+    - attrs==23.1.0
+    - av==10.0.0
+    - backcall==0.2.0
+    - beautifulsoup4==4.12.2
+    - certifi==2022.12.7
+    - charset-normalizer==2.1.1
+    - click==8.1.4
+    - comm==0.1.3
+    - configargparse==1.5.5
+    - configer==1.4.1
+    - configparser==5.3.0
+    - contourpy==1.1.0
+    - cycler==0.11.0
+    - cython==0.29.36
+    - dash==2.11.1
+    - dash-core-components==2.0.0
+    - dash-html-components==2.0.0
+    - dash-table==5.0.0
+    - debugpy==1.6.7
+    - decorator==5.1.1
+    - droid-backends==0.0.0
+    - executing==1.2.0
+    - fastjsonschema==2.17.1
+    - flask==2.2.5
+    - fonttools==4.40.0
+    - gdown==4.7.1
+    - hmr2==0.0.0
+    - idna==3.4
+    - imageio-ffmpeg==0.4.8
+    - importlib-metadata==6.7.0
+    - ipdb==0.13.13
+    - ipykernel==6.24.0
+    - ipython==8.14.0
+    - ipywidgets==8.0.7
+    - itsdangerous==2.1.2
+    - jedi==0.18.2
+    - jinja2==3.1.2
+    - json-tricks==3.17.1
+    - jsonschema==4.18.0
+    - jsonschema-specifications==2023.6.1
+    - jupyter-client==8.3.0
+    - jupyter-core==5.3.1
+    - jupyterlab-widgets==3.0.8
+    - kiwisolver==1.4.4
+    - lietorch==0.2
+    - matplotlib==3.7.2
+    - matplotlib-inline==0.1.6
+    - mmcv==1.3.9
+    - munkres==1.1.4
+    - nbformat==5.7.0
+    - nest-asyncio==1.5.6
+    - oauthlib==3.2.2
+    - open3d==0.17.0
+    - pandas==1.4.0
+    - parso==0.8.3
+    - pexpect==4.8.0
+    - phalp==0.1.3
+    - pickleshare==0.7.5
+    - pillow==9.3.0
+    - pip==23.1.2
+    - plotly==5.15.0
+    - prompt-toolkit==3.0.39
+    - psutil==5.9.5
+    - ptyprocess==0.7.0
+    - pure-eval==0.2.2
+    - pyasn1==0.5.0
+    - pyasn1-modules==0.3.0
+    - pyparsing==3.0.9
+    - pyquaternion==0.9.9
+    - pysocks==1.7.1
+    - pytz==2023.3
+    - pyyaml==6.0
+    - pyzmq==25.1.0
+    - referencing==0.29.1
+    - requests==2.28.1
+    - retrying==1.3.4
+    - rpds-py==0.8.8
+    - scikit-learn==1.3.0
+    - scipy==1.11.1
+    - setuptools==59.5.0
+    - six==1.16.0
+    - soupsieve==2.4.1
+    - stack-data==0.6.2
+    - tenacity==8.2.2
+    - timm==0.4.9
+    - torch==1.13.0+cu117
+    - torch-scatter==2.1.1+pt113cu117
+    - torchgeometry==0.1.2
+    - torchvision==0.14.0+cu117
+    - tornado==6.3.2
+    - traitlets==5.9.0
+    - urllib3==1.26.13
+    - wcwidth==0.2.6
+    - werkzeug==2.2.3
+    - wheel==0.38.4
+    - widgetsnbextension==4.0.8
+    - xtcocotools==1.13
+    - yapf==0.40.1
+    - zipp==3.15.0

slahmr/install.sh ADDED Viewed

	@@ -0,0 +1,35 @@

+#!/usr/bin/env bash
+set -e
+export CONDA_ENV_NAME=slahmr
+conda create -n $CONDA_ENV_NAME python=3.10 -y
+conda activate $CONDA_ENV_NAME
+# install pytorch using pip, update with appropriate cuda drivers if necessary
+pip install torch==1.13.0 torchvision==0.14.0 --index-url https://download.pytorch.org/whl/cu117
+# uncomment if pip installation isn't working
+# conda install pytorch=1.13.0 torchvision=0.14.0 pytorch-cuda=11.7 -c pytorch -c nvidia -y
+# install pytorch scatter using pip, update with appropriate cuda drivers if necessary
+pip install torch-scatter -f https://data.pyg.org/whl/torch-1.13.0+cu117.html
+# uncomment if pip installation isn't working
+# conda install pytorch-scatter -c pyg -y
+# install PHALP
+pip install phalp[all]@git+https://github.com/brjathu/PHALP.git
+# install remaining requirements
+pip install -r requirements.txt
+# install source
+pip install -e .
+# install ViTPose
+pip install -v -e third-party/ViTPose
+# install DROID-SLAM
+cd third-party/DROID-SLAM
+python setup.py install
+cd ../..

slahmr/requirements.txt ADDED Viewed

	@@ -0,0 +1,27 @@

+git+https://github.com/nghorbani/configer
+setuptools==59.5.0
+torchgeometry==0.1.2
+tensorboard
+numpy==1.23
+smplx
+pyrender
+open3d
+imageio-ffmpeg
+matplotlib
+opencv-python
+scipy
+scikit-image
+joblib
+cython
+tqdm
+hydra-core
+pyyaml
+chumpy
+gdown
+dill
+motmetrics
+einops
+mmcv==1.3.9
+timm==0.4.9
+xtcocotools
+pandas==1.4.0

slahmr/setup.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from setuptools import find_packages, setup
+setup(
+    name="slahmr",
+    packages=find_packages(
+        where="slahmr",
+    ),
+    package_dir={"": "slahmr"},
+)

slahmr/slahmr.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e59655cc88dc71f3a9ac20425310bf996da3b6f15619497d17c67e7b60a671a
+size 5662244525

slahmr/slahmr/__init__.py ADDED Viewed

File without changes

slahmr/slahmr/body_model/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .body_model import *
+from .specs import *
+from .utils import *

slahmr/slahmr/body_model/body_model.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import numpy as np
+import torch
+import torch.nn as nn
+from smplx import SMPL, SMPLH, SMPLX
+from smplx.vertex_ids import vertex_ids
+from smplx.utils import Struct
+class BodyModel(nn.Module):
+    """
+    Wrapper around SMPLX body model class.
+    """
+    def __init__(
+        self,
+        bm_path,
+        num_betas=10,
+        batch_size=1,
+        num_expressions=10,
+        use_vtx_selector=False,
+        model_type="smplh",
+        kid_template_path=None,
+    ):
+        super(BodyModel, self).__init__()
+        """
+        Creates the body model object at the given path.
+        :param bm_path: path to the body model pkl file
+        :param num_expressions: only for smplx
+        :param model_type: one of [smpl, smplh, smplx]
+        :param use_vtx_selector: if true, returns additional vertices as joints that correspond to OpenPose joints
+        """
+        self.use_vtx_selector = use_vtx_selector
+        cur_vertex_ids = None
+        if self.use_vtx_selector:
+            cur_vertex_ids = vertex_ids[model_type]
+        data_struct = None
+        if ".npz" in bm_path:
+            # smplx does not support .npz by default, so have to load in manually
+            smpl_dict = np.load(bm_path, encoding="latin1")
+            data_struct = Struct(**smpl_dict)
+            # print(smpl_dict.files)
+            if model_type == "smplh":
+                data_struct.hands_componentsl = np.zeros((0))
+                data_struct.hands_componentsr = np.zeros((0))
+                data_struct.hands_meanl = np.zeros((15 * 3))
+                data_struct.hands_meanr = np.zeros((15 * 3))
+                V, D, B = data_struct.shapedirs.shape
+                data_struct.shapedirs = np.concatenate(
+                    [data_struct.shapedirs, np.zeros((V, D, SMPL.SHAPE_SPACE_DIM - B))],
+                    axis=-1,
+                )  # super hacky way to let smplh use 16-size beta
+        kwargs = {
+            "model_type": model_type,
+            "data_struct": data_struct,
+            "num_betas": num_betas,
+            "batch_size": batch_size,
+            "num_expression_coeffs": num_expressions,
+            "vertex_ids": cur_vertex_ids,
+            "use_pca": False,
+            "flat_hand_mean": False,
+        }
+        if kid_template_path is not None:
+            kwargs["kid_template_path"] = kid_template_path
+            kwargs["age"] = "kid"
+        assert model_type in ["smpl", "smplh", "smplx"]
+        if model_type == "smpl":
+            self.bm = SMPL(bm_path, **kwargs)
+            self.num_joints = SMPL.NUM_JOINTS
+        elif model_type == "smplh":
+            self.bm = SMPLH(bm_path, **kwargs)
+            self.num_joints = SMPLH.NUM_JOINTS
+        elif model_type == "smplx":
+            self.bm = SMPLX(bm_path, **kwargs)
+            self.num_joints = SMPLX.NUM_JOINTS
+        self.model_type = model_type
+    def forward(
+        self,
+        root_orient=None,
+        pose_body=None,
+        pose_hand=None,
+        pose_jaw=None,
+        pose_eye=None,
+        betas=None,
+        trans=None,
+        dmpls=None,
+        expression=None,
+        return_dict=False,
+        **kwargs
+    ):
+        """
+        Note dmpls are not supported.
+        """
+        assert dmpls is None
+        out_obj = self.bm(
+            betas=betas,
+            global_orient=root_orient,
+            body_pose=pose_body,
+            left_hand_pose=None
+            if pose_hand is None
+            else pose_hand[:, : (SMPLH.NUM_HAND_JOINTS * 3)],
+            right_hand_pose=None
+            if pose_hand is None
+            else pose_hand[:, (SMPLH.NUM_HAND_JOINTS * 3) :],
+            transl=trans,
+            expression=expression,
+            jaw_pose=pose_jaw,
+            leye_pose=None if pose_eye is None else pose_eye[:, :3],
+            reye_pose=None if pose_eye is None else pose_eye[:, 3:],
+            return_full_pose=True,
+            **kwargs
+        )
+        out = {
+            "v": out_obj.vertices,
+            "f": self.bm.faces_tensor,
+            "betas": out_obj.betas,
+            "Jtr": out_obj.joints,
+            "pose_body": out_obj.body_pose,
+            "full_pose": out_obj.full_pose,
+        }
+        if self.model_type in ["smplh", "smplx"]:
+            out["pose_hand"] = torch.cat(
+                [out_obj.left_hand_pose, out_obj.right_hand_pose], dim=-1
+            )
+        if self.model_type == "smplx":
+            out["pose_jaw"] = out_obj.jaw_pose
+            out["pose_eye"] = pose_eye
+        if not self.use_vtx_selector:
+            # don't need extra joints
+            out["Jtr"] = out["Jtr"][:, : self.num_joints + 1]  # add one for the root
+        if not return_dict:
+            out = Struct(**out)
+        return out

slahmr/slahmr/body_model/specs.py ADDED Viewed

	@@ -0,0 +1,554 @@

+import numpy as np
+SMPL_JOINTS = {
+    "hips": 0,
+    "leftUpLeg": 1,
+    "rightUpLeg": 2,
+    "spine": 3,
+    "leftLeg": 4,
+    "rightLeg": 5,
+    "spine1": 6,
+    "leftFoot": 7,
+    "rightFoot": 8,
+    "spine2": 9,
+    "leftToeBase": 10,
+    "rightToeBase": 11,
+    "neck": 12,
+    "leftShoulder": 13,
+    "rightShoulder": 14,
+    "head": 15,
+    "leftArm": 16,
+    "rightArm": 17,
+    "leftForeArm": 18,
+    "rightForeArm": 19,
+    "leftHand": 20,
+    "rightHand": 21,
+}
+SMPL_PARENTS = [
+    -1,
+    0,
+    0,
+    0,
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    12,
+    12,
+    12,
+    13,
+    14,
+    16,
+    17,
+    18,
+    19,
+]
+SMPLH_PATH = "./body_models/smplh"
+SMPLX_PATH = "./body_models/smplx"
+SMPL_PATH = "./body_models/smpl"
+VPOSER_PATH = "./body_models/vposer_v1_0"
+# chosen virtual mocap markers that are "keypoints" to work with
+KEYPT_VERTS = [
+    4404,
+    920,
+    3076,
+    3169,
+    823,
+    4310,
+    1010,
+    1085,
+    4495,
+    4569,
+    6615,
+    3217,
+    3313,
+    6713,
+    6785,
+    3383,
+    6607,
+    3207,
+    1241,
+    1508,
+    4797,
+    4122,
+    1618,
+    1569,
+    5135,
+    5040,
+    5691,
+    5636,
+    5404,
+    2230,
+    2173,
+    2108,
+    134,
+    3645,
+    6543,
+    3123,
+    3024,
+    4194,
+    1306,
+    182,
+    3694,
+    4294,
+    744,
+]
+"""
+Openpose
+"""
+OP_NUM_JOINTS = 25
+# OP_IGNORE_JOINTS = [1, 9, 12]  # neck and left/right hip
+OP_IGNORE_JOINTS = [1]  # neck
+OP_EDGE_LIST = [
+    [1, 8],
+    [1, 2],
+    [1, 5],
+    [2, 3],
+    [3, 4],
+    [5, 6],
+    [6, 7],
+    [8, 9],
+    [9, 10],
+    [10, 11],
+    [8, 12],
+    [12, 13],
+    [13, 14],
+    [1, 0],
+    [0, 15],
+    [15, 17],
+    [0, 16],
+    [16, 18],
+    [14, 19],
+    [19, 20],
+    [14, 21],
+    [11, 22],
+    [22, 23],
+    [11, 24],
+]
+# indices to map an openpose detection to its flipped version
+OP_FLIP_MAP = [
+    0,
+    1,
+    5,
+    6,
+    7,
+    2,
+    3,
+    4,
+    8,
+    12,
+    13,
+    14,
+    9,
+    10,
+    11,
+    16,
+    15,
+    18,
+    17,
+    22,
+    23,
+    24,
+    19,
+    20,
+    21,
+]
+#
+# From https://github.com/vchoutas/smplify-x/blob/master/smplifyx/utils.py
+# Please see license for usage restrictions.
+#
+def smpl_to_openpose(
+    model_type="smplx",
+    use_hands=True,
+    use_face=True,
+    use_face_contour=False,
+    openpose_format="coco25",
+):
+    """Returns the indices of the permutation that maps SMPL to OpenPose
+    Parameters
+    ----------
+    model_type: str, optional
+        The type of SMPL-like model that is used. The default mapping
+        returned is for the SMPLX model
+    use_hands: bool, optional
+        Flag for adding to the returned permutation the mapping for the
+        hand keypoints. Defaults to True
+    use_face: bool, optional
+        Flag for adding to the returned permutation the mapping for the
+        face keypoints. Defaults to True
+    use_face_contour: bool, optional
+        Flag for appending the facial contour keypoints. Defaults to False
+    openpose_format: bool, optional
+        The output format of OpenPose. For now only COCO-25 and COCO-19 is
+        supported. Defaults to 'coco25'
+    """
+    if openpose_format.lower() == "coco25":
+        if model_type == "smpl":
+            return np.array(
+                [
+                    24,
+                    12,
+                    17,
+                    19,
+                    21,
+                    16,
+                    18,
+                    20,
+                    0,
+                    2,
+                    5,
+                    8,
+                    1,
+                    4,
+                    7,
+                    25,
+                    26,
+                    27,
+                    28,
+                    29,
+                    30,
+                    31,
+                    32,
+                    33,
+                    34,
+                ],
+                dtype=np.int32,
+            )
+        elif model_type == "smplh":
+            body_mapping = np.array(
+                [
+                    52,
+                    12,
+                    17,
+                    19,
+                    21,
+                    16,
+                    18,
+                    20,
+                    0,
+                    2,
+                    5,
+                    8,
+                    1,
+                    4,
+                    7,
+                    53,
+                    54,
+                    55,
+                    56,
+                    57,
+                    58,
+                    59,
+                    60,
+                    61,
+                    62,
+                ],
+                dtype=np.int32,
+            )
+            mapping = [body_mapping]
+            if use_hands:
+                lhand_mapping = np.array(
+                    [
+                        20,
+                        34,
+                        35,
+                        36,
+                        63,
+                        22,
+                        23,
+                        24,
+                        64,
+                        25,
+                        26,
+                        27,
+                        65,
+                        31,
+                        32,
+                        33,
+                        66,
+                        28,
+                        29,
+                        30,
+                        67,
+                    ],
+                    dtype=np.int32,
+                )
+                rhand_mapping = np.array(
+                    [
+                        21,
+                        49,
+                        50,
+                        51,
+                        68,
+                        37,
+                        38,
+                        39,
+                        69,
+                        40,
+                        41,
+                        42,
+                        70,
+                        46,
+                        47,
+                        48,
+                        71,
+                        43,
+                        44,
+                        45,
+                        72,
+                    ],
+                    dtype=np.int32,
+                )
+                mapping += [lhand_mapping, rhand_mapping]
+            return np.concatenate(mapping)
+        # SMPLX
+        elif model_type == "smplx":
+            body_mapping = np.array(
+                [
+                    55,
+                    12,
+                    17,
+                    19,
+                    21,
+                    16,
+                    18,
+                    20,
+                    0,
+                    2,
+                    5,
+                    8,
+                    1,
+                    4,
+                    7,
+                    56,
+                    57,
+                    58,
+                    59,
+                    60,
+                    61,
+                    62,
+                    63,
+                    64,
+                    65,
+                ],
+                dtype=np.int32,
+            )
+            mapping = [body_mapping]
+            if use_hands:
+                lhand_mapping = np.array(
+                    [
+                        20,
+                        37,
+                        38,
+                        39,
+                        66,
+                        25,
+                        26,
+                        27,
+                        67,
+                        28,
+                        29,
+                        30,
+                        68,
+                        34,
+                        35,
+                        36,
+                        69,
+                        31,
+                        32,
+                        33,
+                        70,
+                    ],
+                    dtype=np.int32,
+                )
+                rhand_mapping = np.array(
+                    [
+                        21,
+                        52,
+                        53,
+                        54,
+                        71,
+                        40,
+                        41,
+                        42,
+                        72,
+                        43,
+                        44,
+                        45,
+                        73,
+                        49,
+                        50,
+                        51,
+                        74,
+                        46,
+                        47,
+                        48,
+                        75,
+                    ],
+                    dtype=np.int32,
+                )
+                mapping += [lhand_mapping, rhand_mapping]
+            if use_face:
+                #  end_idx = 127 + 17 * use_face_contour
+                face_mapping = np.arange(
+                    76, 127 + 17 * use_face_contour, dtype=np.int32
+                )
+                mapping += [face_mapping]
+            return np.concatenate(mapping)
+        else:
+            raise ValueError("Unknown model type: {}".format(model_type))
+    elif openpose_format == "coco19":
+        if model_type == "smpl":
+            return np.array(
+                [24, 12, 17, 19, 21, 16, 18, 20, 0, 2, 5, 8, 1, 4, 7, 25, 26, 27, 28],
+                dtype=np.int32,
+            )
+        elif model_type == "smplh":
+            body_mapping = np.array(
+                [52, 12, 17, 19, 21, 16, 18, 20, 0, 2, 5, 8, 1, 4, 7, 53, 54, 55, 56],
+                dtype=np.int32,
+            )
+            mapping = [body_mapping]
+            if use_hands:
+                lhand_mapping = np.array(
+                    [
+                        20,
+                        34,
+                        35,
+                        36,
+                        57,
+                        22,
+                        23,
+                        24,
+                        58,
+                        25,
+                        26,
+                        27,
+                        59,
+                        31,
+                        32,
+                        33,
+                        60,
+                        28,
+                        29,
+                        30,
+                        61,
+                    ],
+                    dtype=np.int32,
+                )
+                rhand_mapping = np.array(
+                    [
+                        21,
+                        49,
+                        50,
+                        51,
+                        62,
+                        37,
+                        38,
+                        39,
+                        63,
+                        40,
+                        41,
+                        42,
+                        64,
+                        46,
+                        47,
+                        48,
+                        65,
+                        43,
+                        44,
+                        45,
+                        66,
+                    ],
+                    dtype=np.int32,
+                )
+                mapping += [lhand_mapping, rhand_mapping]
+            return np.concatenate(mapping)
+        # SMPLX
+        elif model_type == "smplx":
+            body_mapping = np.array(
+                [55, 12, 17, 19, 21, 16, 18, 20, 0, 2, 5, 8, 1, 4, 7, 56, 57, 58, 59],
+                dtype=np.int32,
+            )
+            mapping = [body_mapping]
+            if use_hands:
+                lhand_mapping = np.array(
+                    [
+                        20,
+                        37,
+                        38,
+                        39,
+                        60,
+                        25,
+                        26,
+                        27,
+                        61,
+                        28,
+                        29,
+                        30,
+                        62,
+                        34,
+                        35,
+                        36,
+                        63,
+                        31,
+                        32,
+                        33,
+                        64,
+                    ],
+                    dtype=np.int32,
+                )
+                rhand_mapping = np.array(
+                    [
+                        21,
+                        52,
+                        53,
+                        54,
+                        65,
+                        40,
+                        41,
+                        42,
+                        66,
+                        43,
+                        44,
+                        45,
+                        67,
+                        49,
+                        50,
+                        51,
+                        68,
+                        46,
+                        47,
+                        48,
+                        69,
+                    ],
+                    dtype=np.int32,
+                )
+                mapping += [lhand_mapping, rhand_mapping]
+            if use_face:
+                face_mapping = np.arange(
+                    70, 70 + 51 + 17 * use_face_contour, dtype=np.int32
+                )
+                mapping += [face_mapping]
+            return np.concatenate(mapping)
+        else:
+            raise ValueError("Unknown model type: {}".format(model_type))
+    else:
+        raise ValueError("Unknown joint format: {}".format(openpose_format))

slahmr/slahmr/body_model/utils.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import torch
+from .specs import SMPL_JOINTS
+def run_smpl(body_model, trans, root_orient, body_pose, betas=None):
+    """
+    Forward pass of the SMPL model and populates pred_data accordingly with
+    joints3d, verts3d, points3d.
+    trans : B x T x 3
+    root_orient : B x T x 3
+    body_pose : B x T x J*3
+    betas : (optional) B x D
+    """
+    B, T, _ = trans.shape
+    bm_batch_size = body_model.bm.batch_size
+    assert bm_batch_size % B == 0
+    seq_len = bm_batch_size // B
+    bm_num_betas = body_model.bm.num_betas
+    J_BODY = len(SMPL_JOINTS) - 1  # all joints except root
+    if T == 1:
+        # must expand to use with body model
+        trans = trans.expand(B, seq_len, 3)
+        root_orient = root_orient.expand(B, seq_len, 3)
+        body_pose = body_pose.expand(B, seq_len, J_BODY * 3)
+    elif T != seq_len:
+        trans, root_orient, body_pose = zero_pad_tensors(
+            [trans, root_orient, body_pose], seq_len - T
+        )
+    if betas is None:
+        betas = torch.zeros(B, bm_num_betas, device=trans.device)
+    betas = betas.reshape((B, 1, bm_num_betas)).expand((B, seq_len, bm_num_betas))
+    smpl_body = body_model(
+        pose_body=body_pose.reshape((B * seq_len, -1)),
+        pose_hand=None,
+        betas=betas.reshape((B * seq_len, -1)),
+        root_orient=root_orient.reshape((B * seq_len, -1)),
+        trans=trans.reshape((B * seq_len, -1)),
+    )
+    return {
+        "joints": smpl_body.Jtr.reshape(B, seq_len, -1, 3)[:, :T],
+        "vertices": smpl_body.v.reshape(B, seq_len, -1, 3)[:, :T],
+        "faces": smpl_body.f,
+    }
+def zero_pad_tensors(pad_list, pad_size):
+    """
+    Assumes tensors in pad_list are B x T x D and pad temporal dimension
+    """
+    B = pad_list[0].size(0)
+    new_pad_list = []
+    for pad_idx, pad_tensor in enumerate(pad_list):
+        padding = torch.zeros((B, pad_size, pad_tensor.size(2))).to(pad_tensor)
+        new_pad_list.append(torch.cat([pad_tensor, padding], dim=1))
+    return new_pad_list

slahmr/slahmr/confs/config.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+defaults:
+  - data: posetrack
+  - optim
+  - _self_
+model:
+  floor_type: "shared"
+  est_floor: False
+  use_init: True
+  opt_cams: False
+  opt_scale: True
+  async_tracks: True
+overwrite: False
+run_opt: False
+run_vis: False
+vis:
+  phases:
+    - motion_chunks
+    - input
+  render_views:
+    - src_cam
+    - above
+    - side
+  make_grid: True
+  overwrite: False
+paths:
+  smpl: _DATA/body_models/smplh/neutral/model.npz
+  smpl_kid: _DATA/body_models/smpl_kid_template.npy
+  vposer: _DATA/body_models/vposer_v1_0
+  init_motion_prior: _DATA/humor_ckpts/init_state_prior_gmm
+  humor: _DATA/humor_ckpts/humor/best_model.pth
+humor:
+  in_rot_rep: "mat"
+  out_rot_rep: "aa"
+  latent_size: 48
+  model_data_config: "smpl+joints+contacts"
+  steps_in: 1
+fps: 30
+log_root: ../outputs/logs
+log_dir: ${log_root}/${data.type}-${data.split}
+exp_name: ${now:%Y-%m-%d}
+hydra:
+  job:
+    chdir: True
+  run:
+    dir: ${log_dir}/${exp_name}/${data.name}

slahmr/slahmr/confs/data/3dpw.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+type: 3dpw
+split: test
+seq: downtown_arguing_00
+root: /path/to/3DPW
+use_cams: True
+split_cameras: False
+camera_name: cameras_intrins_split
+shot_idx: 0
+start_idx: 0
+end_idx: 100
+track_ids: "longest-2"
+sources:
+  images: ${data.root}/imageFiles/${data.seq}
+  cameras: ${data.root}/slahmr/${data.camera_name}/${data.seq}/${data.start_idx}-${data.end_idx}
+  intrins: ${data.root}/slahmr/cameras_gt/${data.seq}/intrinsics.txt
+  tracks: ${data.root}/slahmr/track_preds/${data.seq}
+  shots: ${data.root}/slahmr/shot_idcs/${data.seq}.json
+name: ${data.seq}-${data.track_ids}-${data.start_idx}-${data.end_idx}

slahmr/slahmr/confs/data/3dpw_gt.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+type: 3dpw_gt
+split: test
+seq: downtown_runForBus_00
+root: /path/to/3DPW
+use_cams: True
+split_cameras: False
+camera_name: cameras_intrins_split
+shot_idx: 0
+start_idx: 0
+end_idx: 100
+track_ids: "longest-2"
+sources:
+  images: ${data.root}/imageFiles/${data.seq}
+  cameras: ${data.root}/slahmr/${data.camera_name}/${data.seq}/${data.start_idx}-${data.end_idx}
+  intrins: ${data.root}/slahmr/cameras_gt/${data.seq}/intrinsics.txt
+  tracks: ${data.root}/slahmr/track_gt/${data.seq}
+  shots: ${data.root}/slahmr/shot_idcs/${data.seq}.json
+name: ${data.seq}-${data.track_ids}-${data.start_idx}-${data.end_idx}

slahmr/slahmr/confs/data/custom.yaml ADDED Viewed

	@@ -0,0 +1,17 @@

+type: custom
+split: val
+video: ""
+seq: ""
+root: /path/to/custom
+use_cams: True
+track_ids: "all"
+shot_idx: 0
+start_idx: 0
+end_idx: 200
+split_cameras: True
+name: ${data.seq}-${data.track_ids}-shot-${data.shot_idx}
+sources:
+  images: ${data.root}/images/${data.seq}
+  cameras: ${data.root}/slahmr/cameras/${data.seq}/shot-${data.shot_idx}
+  track: ${data.root}/slahmr/track_preds/${data.seq}
+  shots: ${data.root}/slahmr/shot_idcs/${data.seq}.json

slahmr/slahmr/confs/data/davis.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+type: davis
+split: all
+seq: parkour
+root: /path/to/DAVIS
+use_cams: True
+track_ids: "all"
+shot_idx: 0
+start_idx: 0
+end_idx: -1
+split_cameras: True
+name: ${data.seq}-${data.track_ids}-shot-${data.shot_idx}
+sources:
+  images: ${data.root}/JPEGImages/Full-Resolution/${data.seq}
+  cameras: ${data.root}/slahmr/cameras/${data.seq}/shot-${data.shot_idx}
+  tracks: ${data.root}/slahmr/track_preds/${data.seq}
+  shots: ${data.root}/slahmr/shot_idcs/${data.seq}.json

slahmr/slahmr/confs/data/egobody.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+type: egobody
+split: val
+seq: recording_20210921_S11_S10_01
+root: /path/to/egobody
+use_cams: True
+camera_name: cameras_intrins_split
+shot_idx: 0
+start_idx: 0
+end_idx: 100
+split_cameras: False
+track_ids: "all"
+sources:
+  images: ${data.root}/egocentric_color/${data.seq}/**/PV
+  cameras: ${data.root}/slahmr/${data.camera_name}/${data.seq}/${data.start_idx}-${data.end_idx}
+  intrins: ${data.root}/slahmr/cameras_gt/${data.seq}/intrinsics.txt
+  tracks: ${data.root}/slahmr/track_preds/${data.seq}
+  shots: ${data.root}/slahmr/shot_idcs/${data.seq}.json
+name: ${data.seq}-${data.track_ids}-${data.start_idx}-${data.end_idx}

slahmr/slahmr/confs/data/posetrack.yaml ADDED Viewed

	@@ -0,0 +1,17 @@

+type: posetrack
+split: val
+seq: 014286_mpii_train
+root: /path/to/posetrack
+use_cams: True
+track_ids: "all"
+shot_idx: 0
+start_idx: 0
+end_idx: -1
+split_cameras: True
+name: ${data.seq}-${data.track_ids}-shot-${data.shot_idx}
+track_name: track_preds
+sources:
+  images: ${data.root}/images/${data.split}/${data.seq}
+  cameras: ${data.root}/slahmr/${data.split}/cameras/${data.seq}/shot-${data.shot_idx}
+  tracks: ${data.root}/slahmr/${data.split}/${data.track_name}/${data.seq}
+  shots: ${data.root}/slahmr/${data.split}/shot_idcs/${data.seq}.json

slahmr/slahmr/confs/data/video.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+type: video
+split: val
+root: /path/to/data  # put your videos in root/videos/vid.mp4
+video_dir: videos
+seq: basketball
+ext: mp4
+src_path: ${data.root}/${data.video_dir}/${data.seq}.${data.ext}
+frame_opts:
+  ext: jpg
+  fps: 25
+  start_sec: 0
+  end_sec: -1
+use_cams: True
+track_ids: "all"
+shot_idx: 0
+start_idx: 0
+end_idx: 180
+split_cameras: True
+name: ${data.seq}-${data.track_ids}-shot-${data.shot_idx}-${data.start_idx}-${data.end_idx}
+sources:
+  images: ${data.root}/images/${data.seq}
+  cameras: ${data.root}/slahmr/cameras/${data.seq}/shot-${data.shot_idx}
+  tracks: ${data.root}/slahmr/track_preds/${data.seq}
+  shots: ${data.root}/slahmr/shot_idcs/${data.seq}.json

slahmr/slahmr/confs/init.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+defaults:
+  - data: posetrack
+  - _self_
+gap: 1
+log_root: outputs
+save_per_frame: False
+print_err: False
+stride: 48
+hydra:
+    run:
+        dir: ${log_root}/init/${data.type}-${data.seq}-${data.depth_dir}-${data.fov}fov-gap${gap}

slahmr/slahmr/confs/optim.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+optim:
+  options:
+    robust_loss_type: "bisquare"
+    robust_tuning_const: 4.6851
+    joints2d_sigma: 100.0
+    lr: 1.0
+    lbfgs_max_iter: 20
+    save_every: 20
+    vis_every: -1
+    max_chunk_steps: 20
+    save_meshes: False
+  root:
+    num_iters: 30
+  smpl:
+    num_iters: 0
+  smooth:
+    opt_scale: False
+    num_iters: 60
+  motion_chunks:
+    chunk_size: 10
+    init_steps: 20
+    chunk_steps: 20
+    opt_cams: True
+  loss_weights:
+    joints2d: [0.001, 0.001, 0.001]
+    bg2d: [0.0, 0.000, 0.000]
+    cam_R_smooth : [0.0, 0.0, 0.0]
+    cam_t_smooth : [0.0, 0.0, 0.0]
+      #    bg2d: [0.0, 0.0001, 0.0001]
+      #    cam_R_smooth : [0.0, 1000.0, 1000.0]
+      #    cam_t_smooth : [0.0, 1000.0, 1000.0]
+    joints3d: [0.0, 0.0, 0.0]
+    joints3d_smooth: [1.0, 10.0, 0.0]
+    joints3d_rollout: [0.0, 0.0, 0.0]
+    verts3d: [0.0, 0.0, 0.0]
+    points3d: [0.0, 0.0, 0.0]
+    pose_prior: [0.04, 0.04, 0.04]
+    shape_prior: [0.05, 0.05, 0.05]
+    motion_prior: [0.0, 0.0, 0.075]
+    init_motion_prior: [0.0, 0.0, 0.075]
+    joint_consistency: [0.0, 0.0, 100.0]
+    bone_length: [0.0, 0.0, 2000.0]
+    contact_vel: [0.0, 0.0, 100.0]
+    contact_height: [0.0, 0.0, 10.0]
+    floor_reg: [0.0, 0.0, 0.0]
+#     floor_reg: [0.0, 0.0, 0.167]

slahmr/slahmr/data/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .dataset import *
2	+ from . import tools

slahmr/slahmr/data/dataset.py ADDED Viewed

	@@ -0,0 +1,438 @@

+import os
+import glob
+import typing
+import imageio
+import numpy as np
+import json
+import torch
+import torch.nn.functional as F
+from torch.utils.data import Dataset
+from body_model import OP_NUM_JOINTS, SMPL_JOINTS
+from util.logger import Logger
+from geometry.camera import invert_camera
+from .tools import read_keypoints, read_mask_path, load_smpl_preds
+from .vidproc import preprocess_cameras, preprocess_frames, preprocess_tracks
+"""
+Define data-related constants
+"""
+DEFAULT_GROUND = np.array([0.0, -1.0, 0.0, -0.5])
+# XXX: TEMPORARY CONSTANTS
+SHOT_PAD = 0
+MIN_SEQ_LEN = 20
+MAX_NUM_TRACKS = 12
+MIN_TRACK_LEN = 20
+MIN_KEYP_CONF = 0.4
+def get_dataset_from_cfg(cfg):
+    args = cfg.data
+    if not args.use_cams:
+        args.sources.cameras = ""
+    args.sources = expand_source_paths(args.sources)
+    print("DATA SOURCES", args.sources)
+    check_data_sources(args)
+    return MultiPeopleDataset(
+        args.sources,
+        args.seq,
+        tid_spec=args.track_ids,
+        shot_idx=args.shot_idx,
+        start_idx=int(args.start_idx),
+        end_idx=int(args.end_idx),
+        split_cameras=args.get("split_cameras", True),
+    )
+def expand_source_paths(data_sources):
+    return {k: get_data_source(v) for k, v in data_sources.items()}
+def get_data_source(source):
+    matches = glob.glob(source)
+    if len(matches) < 1:
+        print(f"{source} does not exist")
+        return source  # return anyway for default values
+    if len(matches) > 1:
+        raise ValueError(f"{source} is not unique")
+    return matches[0]
+def check_data_sources(args):
+    if args.type == "video":
+        preprocess_frames(args.sources.images, args.src_path, **args.frame_opts)
+    preprocess_tracks(args.sources.images, args.sources.tracks, args.sources.shots)
+    preprocess_cameras(args, overwrite=args.get("overwrite_cams", False))
+class MultiPeopleDataset(Dataset):
+    def __init__(
+        self,
+        data_sources: typing.Dict,
+        seq_name,
+        tid_spec="all",
+        shot_idx=0,
+        start_idx=0,
+        end_idx=-1,
+        pad_shot=False,
+        split_cameras=True,
+    ):
+        self.seq_name = seq_name
+        self.data_sources = data_sources
+        self.split_cameras = split_cameras
+        # select only images in the desired shot
+        img_files, _ = get_shot_img_files(
+            self.data_sources["shots"], shot_idx, pad_shot
+        )
+        end_idx = end_idx if end_idx > 0 else len(img_files)
+        self.data_start, self.data_end = start_idx, end_idx
+        img_files = img_files[start_idx:end_idx]
+        self.img_names = [get_name(f) for f in img_files]
+        self.num_imgs = len(self.img_names)
+        img_dir = self.data_sources["images"]
+        assert os.path.isdir(img_dir)
+        self.img_paths = [os.path.join(img_dir, f) for f in img_files]
+        img_h, img_w = imageio.imread(self.img_paths[0]).shape[:2]
+        self.img_size = img_w, img_h
+        print(f"USING TOTAL {self.num_imgs} {img_w}x{img_h} IMGS")
+        # find the tracks in the video
+        track_root = self.data_sources["tracks"]
+        if tid_spec == "all" or tid_spec.startswith("longest"):
+            n_tracks = MAX_NUM_TRACKS
+            if tid_spec.startswith("longest"):
+                n_tracks = int(tid_spec.split("-")[1])
+            # get the longest tracks in the selected shot
+            track_ids = sorted(os.listdir(track_root))
+            track_paths = [
+                [f"{track_root}/{tid}/{name}_keypoints.json" for name in self.img_names]
+                for tid in track_ids
+            ]
+            track_lens = [
+                len(list(filter(os.path.isfile, paths))) for paths in track_paths
+            ]
+            track_ids = [
+                track_ids[i]
+                for i in np.argsort(track_lens)[::-1]
+                if track_lens[i] > MIN_TRACK_LEN
+            ]
+            print("TRACK LENGTHS", track_ids, track_lens)
+            track_ids = track_ids[:n_tracks]
+        else:
+            track_ids = [f"{int(tid):03d}" for tid in tid_spec.split("-")]
+        print("TRACK IDS", track_ids)
+        self.track_ids = track_ids
+        self.n_tracks = len(track_ids)
+        self.track_dirs = [os.path.join(track_root, tid) for tid in track_ids]
+        # keep a list of frame index masks of whether a track is available in a frame
+        sidx = np.inf
+        eidx = -1
+        self.track_vis_masks = []
+        for pred_dir in self.track_dirs:
+            kp_paths = [f"{pred_dir}/{x}_keypoints.json" for x in self.img_names]
+            has_kp = [os.path.isfile(x) for x in kp_paths]
+            # keep track of which frames this track is visible in
+            vis_mask = np.array(has_kp)
+            idcs = np.where(vis_mask)[0]
+            if len(idcs) > 0:
+                si, ei = min(idcs), max(idcs)
+                sidx = min(sidx, si)
+                eidx = max(eidx, ei)
+            self.track_vis_masks.append(vis_mask)
+        eidx = max(eidx + 1, 0)
+        sidx = min(sidx, eidx)
+        print("START", sidx, "END", eidx)
+        self.start_idx = sidx
+        self.end_idx = eidx
+        self.seq_len = eidx - sidx
+        self.seq_intervals = [(sidx, eidx) for _ in track_ids]
+        self.sel_img_paths = self.img_paths[sidx:eidx]
+        self.sel_img_names = self.img_names[sidx:eidx]
+        # used to cache data
+        self.data_dict = {}
+        self.cam_data = None
+    def __len__(self):
+        return self.n_tracks
+    def load_data(self, interp_input=True):
+        if len(self.data_dict) > 0:
+            return
+        # load camera data
+        self.load_camera_data()
+        # get data for each track
+        data_out = {
+            "mask_paths": [],
+            "floor_plane": [],
+            "joints2d": [],
+            "vis_mask": [],
+            "track_interval": [],
+            "init_body_pose": [],
+            "init_root_orient": [],
+            "init_trans": [],
+        }
+        # create batches of sequences
+        # each batch is a track for a person
+        T = self.seq_len
+        sidx, eidx = self.start_idx, self.end_idx
+        for i, tid in enumerate(self.track_ids):
+            # load mask of visible frames for this track
+            vis_mask = self.track_vis_masks[i][sidx:eidx]  # (T)
+            vis_idcs = np.where(vis_mask)[0]
+            track_s, track_e = min(vis_idcs), max(vis_idcs) + 1
+            data_out["track_interval"].append([track_s, track_e])
+            vis_mask = get_ternary_mask(vis_mask)
+            data_out["vis_mask"].append(vis_mask)
+            # load 2d keypoints for visible frames
+            kp_paths = [
+                f"{self.track_dirs[i]}/{x}_keypoints.json" for x in self.sel_img_names
+            ]
+            # (T, J, 3) (x, y, conf)
+            joints2d_data = np.stack(
+                [read_keypoints(p) for p in kp_paths], axis=0
+            ).astype(np.float32)
+            # Discard bad ViTPose detections
+            joints2d_data[
+                np.repeat(joints2d_data[:, :, [2]] < MIN_KEYP_CONF, 3, axis=2)
+            ] = 0
+            data_out["joints2d"].append(joints2d_data)
+            # load single image smpl predictions
+            pred_paths = [
+                f"{self.track_dirs[i]}/{x}_smpl.json" for x in self.sel_img_names
+            ]
+            pose_init, orient_init, trans_init, _ = load_smpl_preds(
+                pred_paths, interp=interp_input
+            )
+            n_joints = len(SMPL_JOINTS) - 1
+            data_out["init_body_pose"].append(pose_init[:, :n_joints, :])
+            data_out["init_root_orient"].append(orient_init)
+            data_out["init_trans"].append(trans_init)
+            data_out["floor_plane"].append(DEFAULT_GROUND[:3] * DEFAULT_GROUND[3:])
+        self.data_dict = data_out
+    def __getitem__(self, idx):
+        if len(self.data_dict) < 1:
+            self.load_data()
+        obs_data = dict()
+        # 2D keypoints
+        joint2d_data = self.data_dict["joints2d"][idx]
+        obs_data["joints2d"] = torch.Tensor(joint2d_data)
+        # single frame predictions
+        obs_data["init_body_pose"] = torch.Tensor(self.data_dict["init_body_pose"][idx])
+        obs_data["init_root_orient"] = torch.Tensor(
+            self.data_dict["init_root_orient"][idx]
+        )
+        obs_data["init_trans"] = torch.Tensor(self.data_dict["init_trans"][idx])
+        # floor plane
+        obs_data["floor_plane"] = torch.Tensor(self.data_dict["floor_plane"][idx])
+        # the frames the track is visible in
+        obs_data["vis_mask"] = torch.Tensor(self.data_dict["vis_mask"][idx])
+        # the frames used in this subsequence
+        obs_data["seq_interval"] = torch.Tensor(list(self.seq_intervals[idx])).to(
+            torch.int
+        )
+        # the start and end interval of available keypoints
+        obs_data["track_interval"] = torch.Tensor(
+            self.data_dict["track_interval"][idx]
+        ).int()
+        obs_data["track_id"] = int(self.track_ids[idx])
+        obs_data["seq_name"] = self.seq_name
+        return obs_data
+    def load_camera_data(self):
+        cam_dir = self.data_sources["cameras"]
+        data_interval = 0, -1
+        if self.split_cameras:
+            data_interval = self.data_start, self.data_end
+        track_interval = self.start_idx, self.end_idx
+        self.cam_data = CameraData(
+            cam_dir, self.seq_len, self.img_size, data_interval, track_interval
+        )
+    def get_camera_data(self):
+        if self.cam_data is None:
+            raise ValueError
+        return self.cam_data.as_dict()
+class CameraData(object):
+    def __init__(
+        self, cam_dir, seq_len, img_size, data_interval=[0, -1], track_interval=[0, -1]
+    ):
+        self.img_size = img_size
+        self.cam_dir = cam_dir
+        # inclusive exclusive
+        data_start, data_end = data_interval
+        if data_end < 0:
+            data_end += seq_len + 1
+        data_len = data_end - data_start
+        # start and end indices are with respect to the data interval
+        sidx, eidx = track_interval
+        if eidx < 0:
+            eidx += data_len + 1
+        self.sidx, self.eidx = sidx + data_start, eidx + data_start
+        self.seq_len = self.eidx - self.sidx
+        self.load_data()
+    def load_data(self):
+        # camera info
+        sidx, eidx = self.sidx, self.eidx
+        img_w, img_h = self.img_size
+        fpath = os.path.join(self.cam_dir, "cameras.npz")
+        if os.path.isfile(fpath):
+            Logger.log(f"Loading cameras from {fpath}...")
+            cam_R, cam_t, intrins, width, height = load_cameras_npz(fpath)
+            scale = img_w / width
+            self.intrins = scale * intrins[sidx:eidx]
+            # move first camera to origin
+            #             R0, t0 = invert_camera(cam_R[sidx], cam_t[sidx])
+            #             self.cam_R = torch.einsum("ij,...jk->...ik", R0, cam_R[sidx:eidx])
+            #             self.cam_t = t0 + torch.einsum("ij,...j->...i", R0, cam_t[sidx:eidx])
+#             t0 = -cam_t[sidx:eidx].mean(dim=0) + torch.randn(3) * 0.1
+            t0 = -cam_t[sidx:sidx+1] + torch.randn(3) * 0.1
+            self.cam_R = cam_R[sidx:eidx]
+            self.cam_t = cam_t[sidx:eidx] - t0
+            self.is_static = False
+        else:
+            Logger.log(f"WARNING: {fpath} does not exist, using static cameras...")
+            default_focal = 0.5 * (img_h + img_w)
+            self.intrins = torch.tensor(
+                [default_focal, default_focal, img_w / 2, img_h / 2]
+            )[None].repeat(self.seq_len, 1)
+            self.cam_R = torch.eye(3)[None].repeat(self.seq_len, 1, 1)
+            self.cam_t = torch.zeros(self.seq_len, 3)
+            self.is_static = True
+        Logger.log(f"Images have {img_w}x{img_h}, intrins {self.intrins[0]}")
+        print("CAMERA DATA", self.cam_R.shape, self.cam_t.shape, self.intrins[0])
+    def world2cam(self):
+        return self.cam_R, self.cam_t
+    def cam2world(self):
+        R = self.cam_R.transpose(-1, -2)
+        t = -torch.einsum("bij,bj->bi", R, self.cam_t)
+        return R, t
+    def as_dict(self):
+        return {
+            "cam_R": self.cam_R,  # (T, 3, 3)
+            "cam_t": self.cam_t,  # (T, 3)
+            "intrins": self.intrins,  # (T, 4)
+            "static": self.is_static,  # bool
+        }
+def get_ternary_mask(vis_mask):
+    # get the track start and end idcs relative to the filtered interval
+    vis_mask = torch.as_tensor(vis_mask)
+    vis_idcs = torch.where(vis_mask)[0]
+    track_s, track_e = min(vis_idcs), max(vis_idcs) + 1
+    # -1 = track out of scene, 0 = occlusion, 1 = visible
+    vis_mask = vis_mask.float()
+    vis_mask[:track_s] = -1
+    vis_mask[track_e:] = -1
+    return vis_mask
+def get_shot_img_files(shots_path, shot_idx, shot_pad=SHOT_PAD):
+    assert os.path.isfile(shots_path)
+    with open(shots_path, "r") as f:
+        shots_dict = json.load(f)
+    img_names = sorted(shots_dict.keys())
+    N = len(img_names)
+    shot_mask = np.array([shots_dict[x] == shot_idx for x in img_names])
+    idcs = np.where(shot_mask)[0]
+    if shot_pad > 0:  # drop the frames before/after shot change
+        if min(idcs) > 0:
+            idcs = idcs[shot_pad:]
+        if len(idcs) > 0 and max(idcs) < N - 1:
+            idcs = idcs[:-shot_pad]
+        if len(idcs) < MIN_SEQ_LEN:
+            raise ValueError("shot is too short for optimization")
+        shot_mask = np.zeros(N, dtype=bool)
+        shot_mask[idcs] = 1
+    sel_paths = [img_names[i] for i in idcs]
+    print(f"FOUND {len(idcs)}/{len(shots_dict)} FRAMES FOR SHOT {shot_idx}")
+    return sel_paths, idcs
+def load_cameras_npz(camera_path):
+    assert os.path.splitext(camera_path)[-1] == ".npz"
+    cam_data = np.load(camera_path)
+    height, width, focal = (
+        int(cam_data["height"]),
+        int(cam_data["width"]),
+        float(cam_data["focal"]),
+    )
+    w2c = torch.from_numpy(cam_data["w2c"])  # (N, 4, 4)
+    cam_R = w2c[:, :3, :3]  # (N, 3, 3)
+    cam_t = w2c[:, :3, 3]  # (N, 3)
+    N = len(w2c)
+    if "intrins" in cam_data:
+        intrins = torch.from_numpy(cam_data["intrins"].astype(np.float32))
+    else:
+        intrins = torch.tensor([focal, focal, width / 2, height / 2])[None].repeat(N, 1)
+    print(f"Loaded {N} cameras")
+    return cam_R, cam_t, intrins, width, height
+def is_image(x):
+    return (x.endswith(".png") or x.endswith(".jpg")) and not x.startswith(".")
+def get_name(x):
+    return os.path.splitext(os.path.basename(x))[0]
+def split_name(x, suffix):
+    return os.path.basename(x).split(suffix)[0]
+def get_names_in_dir(d, suffix):
+    files = [split_name(x, suffix) for x in glob.glob(f"{d}/*{suffix}")]
+    return sorted(files)
+def batch_join(parent, names, suffix=""):
+    return [os.path.join(parent, f"{n}{suffix}") for n in names]

slahmr/slahmr/data/tools.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import os
+import json
+import functools
+import numpy as np
+from body_model import OP_NUM_JOINTS
+from scipy.interpolate import interp1d
+from scipy.spatial.transform import Rotation, Slerp
+def read_keypoints(keypoint_fn):
+    """
+    Only reads body keypoint data of first person.
+    """
+    empty_kps = np.zeros((OP_NUM_JOINTS, 3), dtype=np.float)
+    if not os.path.isfile(keypoint_fn):
+        return empty_kps
+    with open(keypoint_fn) as keypoint_file:
+        data = json.load(keypoint_file)
+    if len(data["people"]) == 0:
+        print("WARNING: Found no keypoints in %s! Returning zeros!" % (keypoint_fn))
+        return empty_kps
+    person_data = data["people"][0]
+    body_keypoints = np.array(person_data["pose_keypoints_2d"], dtype=np.float)
+    body_keypoints = body_keypoints.reshape([-1, 3])
+    return body_keypoints
+def read_mask_path(path):
+    mask_path = None
+    if not os.path.isfile(path):
+        return mask_path
+    with open(path, "r") as f:
+        data = json.load(path)
+    person_data = data["people"][0]
+    if "mask_path" in person_data:
+        mask_path = person_data["mask_path"]
+    return mask_path
+def read_smpl_preds(pred_path, num_betas=10):
+    """
+    reads the betas, body_pose, global orientation and translation of a smpl prediction
+    exported from phalp outputs
+    returns betas (10,), body_pose (23, 3), global_orientation (3,), translation (3,)
+    """
+    pose = np.zeros((23, 3))
+    rot = np.zeros(3)
+    trans = np.zeros(3)
+    betas = np.zeros(num_betas)
+    if not os.path.isfile(pred_path):
+        return pose, rot, trans, betas
+    with open(pred_path, "r") as f:
+        data = json.load(f)
+    if "body_pose" in data:
+        pose = np.array(data["body_pose"], dtype=np.float32)
+    if "global_orient" in data:
+        rot = np.array(data["global_orient"], dtype=np.float32)
+    if "cam_trans" in data:
+        trans = np.array(data["cam_trans"], dtype=np.float32)
+    if "betas" in data:
+        betas = np.array(data["betas"], dtype=np.float32)
+    return pose, rot, trans, betas
+def load_smpl_preds(pred_paths, interp=True, num_betas=10):
+    vis_mask = np.array([os.path.isfile(x) for x in pred_paths])
+    vis_idcs = np.where(vis_mask)[0]
+    # load single image smpl predictions
+    stack_fnc = functools.partial(np.stack, axis=0)
+    # (N, 23, 3), (N, 3), (N, 3), (N, 10)
+    pose, orient, trans, betas = map(
+        stack_fnc, zip(*[read_smpl_preds(p, num_betas=num_betas) for p in pred_paths])
+    )
+    if not interp:
+        return pose, orient, trans, betas
+    # interpolate the occluded tracks
+    orient_slerp = Slerp(vis_idcs, Rotation.from_rotvec(orient[vis_idcs]))
+    trans_interp = interp1d(vis_idcs, trans[vis_idcs], axis=0)
+    betas_interp = interp1d(vis_idcs, betas[vis_idcs], axis=0)
+    tmin, tmax = min(vis_idcs), max(vis_idcs) + 1
+    times = np.arange(tmin, tmax)
+    orient[times] = orient_slerp(times).as_rotvec()
+    trans[times] = trans_interp(times)
+    betas[times] = betas_interp(times)
+    # interpolate for each joint angle
+    for i in range(pose.shape[1]):
+        pose_slerp = Slerp(vis_idcs, Rotation.from_rotvec(pose[vis_idcs, i]))
+        pose[times, i] = pose_slerp(times).as_rotvec()
+    return pose, orient, trans, betas

slahmr/slahmr/data/vidproc.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import os
+import numpy as np
+import subprocess
+import preproc.launch_phalp as phalp
+from preproc.launch_slam import split_frames_shots, get_command, check_intrins
+from preproc.extract_frames import video_to_frames
+def is_nonempty(d):
+    return os.path.isdir(d) and len(os.listdir(d)) > 0
+def preprocess_frames(img_dir, src_path, overwrite=False, **kwargs):
+    if not overwrite and is_nonempty(img_dir):
+        print(f"FOUND {len(os.listdir(img_dir))} FRAMES in {img_dir}")
+        return
+    print(f"EXTRACTING FRAMES FROM {src_path} TO {img_dir}")
+    print(kwargs)
+    out = video_to_frames(src_path, img_dir, overwrite=overwrite, **kwargs)
+    assert out == 0, "FAILED FRAME EXTRACTION"
+def preprocess_tracks(img_dir, track_dir, shot_dir, overwrite=False):
+    """
+    :param img_dir
+    :param track_dir, expected format: res_root/track_name/sequence
+    :param shot_dir, expected format: res_root/shot_name/sequence
+    """
+    if not overwrite and is_nonempty(track_dir):
+        print(f"FOUND TRACKS IN {track_dir}")
+        return
+    print(f"RUNNING PHALP ON {img_dir}")
+    track_root, seq = os.path.split(track_dir.rstrip("/"))
+    res_root, track_name = os.path.split(track_root)
+    shot_name = shot_dir.rstrip("/").split("/")[-2]
+    gpu = os.environ.get("CUDA_VISIBLE_DEVICES", 0)
+    phalp.process_seq(
+        [gpu],
+        seq,
+        img_dir,
+        f"{res_root}/phalp_out",
+        track_name=track_name,
+        shot_name=shot_name,
+        overwrite=overwrite,
+    )
+def preprocess_cameras(cfg, overwrite=False):
+    if not overwrite and is_nonempty(cfg.sources.cameras):
+        print(f"FOUND CAMERAS IN {cfg.sources.cameras}")
+        return
+    print(f"RUNNING SLAM ON {cfg.seq}")
+    img_dir = cfg.sources.images
+    map_dir = cfg.sources.cameras
+    subseqs, shot_idcs = split_frames_shots(cfg.sources.images, cfg.sources.shots)
+    shot_idx = np.where(shot_idcs == cfg.shot_idx)[0][0]
+    # run on selected shot
+    start, end = subseqs[shot_idx]
+    if not cfg.split_cameras:
+        # only run on specified segment within shot
+        end = start + cfg.end_idx
+        start = start + cfg.start_idx
+    intrins_path = cfg.sources.get("intrins", None)
+    if intrins_path is not None:
+        intrins_path = check_intrins(cfg.type, cfg.root, intrins_path, cfg.seq, cfg.split)
+    cmd = get_command(
+        img_dir,
+        map_dir,
+        start=start,
+        end=end,
+        intrins_path=intrins_path,
+        overwrite=overwrite,
+    )
+    print(cmd)
+    gpu = os.environ.get("CUDA_VISIBLE_DEVICES", 0)
+    out = subprocess.call(f"CUDA_VISIBLE_DEVICES={gpu} {cmd}", shell=True)
+    assert out == 0, "SLAM FAILED"

slahmr/slahmr/eval/__init__.py ADDED Viewed

File without changes

slahmr/slahmr/eval/associate.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import os
+import glob
+import json
+import joblib
+import numpy as np
+import torch
+from data.tools import read_keypoints
+def associate_phalp_track_dirs(
+    phalp_dir, img_dir, track_ids, gt_kps, start=0, end=-1, debug=False
+):
+    """
+    Associate the M track_ids with G GT tracks
+    returns (M, T) array of best matching GT person index
+    :param phalp_dir (str) directory with phalp track folders
+    :param img_dir (str) directory with source images
+    :param track ids (list) M tracks to match
+    :param gt_kps (G, T, J, 3) gt keypoints for G people, T times, J joints
+    :param start (optional int default 0)
+    :param end (optional int default -1)
+    """
+    img_names = sorted([os.path.splitext(x)[0] for x in os.listdir(img_dir)])
+    N = len(img_names)
+    end = N + 1 + end if end < 0 else end
+    sel_imgs = img_names[start:end]
+    G, T = gt_kps.shape[:2]  # G num people, T num frames
+    assert len(sel_imgs) == T, f"found {len(sel_imgs)} frames, expected {T}"
+    track_ids = [f"{int(tid):03d}" for tid in track_ids]
+    M = len(track_ids)
+    # find the best matching GT track for each PHALP track
+    match_idcs = torch.full((M, T), -1)
+    for t, frame_name in enumerate(sel_imgs):
+        track_kps = []  # get track keypoints
+        for tid in track_ids:
+            kp_path = f"{phalp_dir}/{tid}/{frame_name}_keypoints.json"
+            track_kps.append(read_keypoints(kp_path))
+        track_kps = np.stack(track_kps, axis=0)  # (M, 25, 3)
+        for g in range(G):
+            kp_gt = gt_kps[g, t].T.numpy()  # (18, 3)
+            m = associate_keypoints(kp_gt, track_kps, debug=debug)
+            if m == -1:
+                continue
+            match_idcs[m, t] = g
+    return match_idcs
+def associate_phalp_track_data(
+    phalp_file, track_ids, gt_kps, start=0, end=-1, debug=False
+):
+    """
+    Get the best GT person for each phalp track
+    :param phalp_file (path) to phalp result pickle file
+    :param gt_kps (G, T, 3, 18) gt keypoints
+    :param track_ids (list) of phalp track ids
+    :param start (optional int)
+    :param end (optional int)
+    return (M, T) array the matching GT person index for each phalp track
+    """
+    data = joblib.load(phalp_file)
+    img_names = sorted(data.keys())
+    N = len(img_names)  # number of frames
+    end = N + 1 + end if end < 0 else end
+    sel_imgs = img_names[start:end]
+    G, T = gt_kps.shape[:2]  # G num people, T num frames
+    assert len(sel_imgs) == T, f"found {len(sel_imgs)} frames, expected {T}"
+    M = len(track_ids)
+    track_idcs = {tid: m for m, tid in enumerate(track_ids)}
+    # get the best matching GT track for each PHALP track
+    match_idcs = torch.full((M, T), -1)
+    for t, frame_name in enumerate(sel_imgs):
+        frame_data = data[frame_name]
+        for g in range(G):
+            kp_gt = gt_kps[g, t].T.numpy()  # (18, 3)
+            # get the best track ID for the GT person
+            tid = associate_frame_dict(frame_data, kp_gt, track_ids, debug=debug)
+            if tid == -1:
+                continue
+            m = track_idcs[tid]
+            match_idcs[m, t] = g
+    return match_idcs
+def associate_keypoints(gt_kps, track_kps, debug=False):
+    """
+    :param gt_bbox (25, 3)
+    :param track_bboxes (M, 25, 3)
+    return the index of the best overlapping track bbox
+    """
+    gt_kps = gt_kps[gt_kps[:, 2] > 0, :2]
+    if len(gt_kps) < 1:
+        return -1
+    bb_min, bb_max = gt_kps.min(axis=0), gt_kps.max(axis=0)
+    gt_bbox = np.concatenate([bb_min, bb_max], axis=-1)  # (4,)
+    track_kps = track_kps[..., :2]  # (M, 25, 2)
+    track_min, track_max = track_kps.min(axis=1), track_kps.max(axis=1)
+    track_bboxes = np.concatenate([track_min, track_max], axis=-1)  # (M, 4)
+    ious = np.stack([compute_iou(bb, gt_bbox)[0] for bb in track_bboxes], axis=0)
+    return np.argmax(ious)
+def associate_frame_dict(frame_data, gt_kps, track_ids, debug=False):
+    """
+    For the GT keypoints, find the PHALP track in track_ids with best overlap
+    :param frame_data (dict) PHALP output data
+    :param gt_kps (25, 3)
+    :param track_ids (list of N) PHALP track ids to search over
+    return the id in track_ids with the biggest overlap with gt_kps
+    """
+    gt_kps = gt_kps[gt_kps[:, 2] > 0, :2]
+    if len(gt_kps) < 1:
+        return -1
+    bb_min, bb_max = gt_kps.min(axis=0), gt_kps.max(axis=0)
+    gt_bbox = np.concatenate([bb_min, bb_max], axis=-1)  # (4,)
+    # use strs for track ids
+    tid_strs = [str(tid) for tid in track_ids]
+    # get the list indices of the PHALP tracks
+    track_idcs = {
+        str(int(tid)): i
+        for i, tid in enumerate(frame_data["tid"])
+        if tid in frame_data["tracked_ids"]
+    }
+    # select the track with the biggest overlap with the gt kps
+    ious = []
+    for tid in track_ids:
+        if tid not in track_idcs:
+            ious.append(0)
+            continue
+        bb = frame_data["bbox"][track_idcs[tid]]  # (min_x, min_y, w, h)
+        bbox = np.concatenate([bb[:2], bb[:2] + bb[2:]], axis=-1)
+        iou = compute_iou(bbox, gt_bbox)[0]
+        ious.append(iou)
+    ious = np.stack(ious, axis=0)
+    idx = np.argmax(ious)
+    if debug:
+        print(track_ids[idx], track_ids, ious)
+    return track_ids[idx]
+def compute_iou(bb1, bb2):
+    """
+    :param bb1 (..., 4) top left x, y bottom right x y
+    :param bb2 (..., 4) top left x, y bottom right x y
+    return (...) IOU
+    """
+    x11, y11, x12, y12 = np.split(bb1, 4, axis=-1)
+    x21, y21, x22, y22 = np.split(bb2, 4, axis=-1)
+    x1 = np.maximum(x11, x21)
+    y1 = np.maximum(y11, y21)
+    x2 = np.minimum(x12, x22)
+    y2 = np.minimum(y12, y22)
+    intersect = np.maximum((x2 - x1) * (y2 - y1), 0)
+    union = (x12 - x11) * (y12 - y11) + (x22 - x21) * (y22 - y21) - intersect
+    return intersect / (union + 1e-6)

slahmr/slahmr/eval/egobody_utils.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import os
+import itertools
+import glob
+import pickle
+import json
+import pandas as pd
+import numpy as np
+import torch
+from tools import load_body_model, move_to, detach_all, EGOBODY_ROOT
+def get_sequence_body_info(seq_name):
+    info_file = f"{EGOBODY_ROOT}/data_info_release.csv"
+    info_df = pd.read_csv(info_file)
+    seq_info = info_df[info_df["recording_name"] == seq_name]
+    return seq_info["body_idx_fpv"].values[0]
+def get_egobody_split(split):
+    split_file = f"{EGOBODY_ROOT}/data_splits.csv"
+    split_df = pd.read_csv(split_file)
+    if split not in split_df.columns:
+        print(f"{split} not in {split_file}")
+        return []
+    return split_df[split].dropna().tolist()
+def get_egobody_seq_paths(seq_name, start=0, end=-1):
+    img_dir = get_egobody_img_dir(seq_name)
+    # img files are named [timestamp]_frame_[index].jpg
+    img_files = sorted(os.listdir(img_dir))
+    end = len(img_files) if end < 0 else end
+    print(f"FOUND {len(img_files)} FILES FOR SEQ {seq_name}")
+    return img_files[start:end]
+def get_egobody_seq_names(seq_name, start=0, end=-1):
+    img_files = get_egobody_seq_paths(seq_name, start=start, end=end)
+    frame_names = ["_".join(x.split(".")[0].split("_")[1:]) for x in img_files]
+    return frame_names
+def get_egobody_img_dir(seq_name):
+    img_dir = f"{EGOBODY_ROOT}/egocentric_color/{seq_name}/**/PV"
+    matches = glob.glob(img_dir)
+    if len(matches) != 1:
+        raise ValueError(f"{img_dir} has {len(matches)} matches!")
+    return matches[0]
+def get_egobody_keypoints(seq_name, start=0, end=-1):
+    img_dir = os.path.dirname(get_egobody_img_dir(seq_name))
+    kp_file = f"{img_dir}/keypoints.npz"
+    valid_file = f"{img_dir}/valid_frame.npz"
+    # missing keypoints aren't included, must fill in
+    kp_dict = {}
+    valid_dict = {}
+    kp_data = np.load(kp_file)
+    valid_data = np.load(valid_file)
+    zeros = np.zeros_like(kp_data["keypoints"][0])
+    for img_path, kps in zip(kp_data["imgname"], kp_data["keypoints"]):
+        img_name = os.path.basename(img_path)
+        kp_dict[img_name] = kps
+    for img_path, valid in zip(valid_data["imgname"], valid_data["valid"]):
+        img_name = os.path.basename(img_path)
+        valid_dict[img_name] = valid
+    img_paths = sorted(glob.glob(f"{img_dir}/PV/*.jpg"))
+    end = len(img_paths) + 1 + end if end < 0 else end
+    img_names = [os.path.basename(x) for x in img_paths[start:end]]
+    kps = np.stack([kp_dict.get(name, zeros) for name in img_names], axis=0)
+    valid = np.stack([valid_dict.get(name, False) for name in img_names], axis=0)
+    return kps, valid
+def load_egobody_smpl_params(seq_name, start=0, end=-1):
+    frame_names = get_egobody_seq_names(seq_name, start=start, end=end)
+    body_name = get_sequence_body_info(seq_name)
+    body_idx, gender = body_name.split(" ")
+    smpl_dir = (
+        f"{EGOBODY_ROOT}/smpl_interactee_val/{seq_name}/body_idx_{body_idx}/results"
+    )
+    if not os.path.isdir(smpl_dir):
+        raise ValueError(f"EXPECTED BODY DIR {smpl_dir} DOES NOT EXIST")
+    print(f"LOADING {len(frame_names)} SMPL PARAMS FROM {smpl_dir}")
+    smpl_dict = {"trans": [], "root_orient": [], "pose_body": [], "betas": []}
+    for frame in frame_names:
+        with open(f"{smpl_dir}/{frame}/000.pkl", "rb") as f:
+            # data has global_orient, body_pose, betas, transl
+            data = pickle.load(f)
+            smpl_dict["trans"].append(torch.from_numpy(data["transl"]))
+            smpl_dict["pose_body"].append(torch.from_numpy(data["body_pose"]))
+            smpl_dict["root_orient"].append(torch.from_numpy(data["global_orient"]))
+            smpl_dict["betas"].append(torch.from_numpy(data["betas"]))
+    smpl_dict = {k: torch.cat(v, dim=0)[None] for k, v in smpl_dict.items()}
+    smpl_dict["genders"] = [gender]
+    return smpl_dict
+def load_egobody_intrinsics(seq_name, start=0, end=-1, ret_size_tuple=True):
+    path = f"{EGOBODY_ROOT}/slahmr/cameras_gt/{seq_name}/intrinsics.txt"
+    assert os.path.isfile(path)
+    intrins = np.loadtxt(path)  # (T, 6)
+    end = len(intrins) if end < 0 else end
+    intrins = intrins[start:end]
+    if ret_size_tuple:
+        img_size = intrins[0, 4:].astype(int).tolist()  # (2)
+        intrins = torch.from_numpy(intrins[:, :4].astype(np.float32))
+        return intrins, img_size
+    img_size = torch.from_numpy(intrins[:, 4:].astype(int))
+    intrins = torch.from_numpy(intrins[:, :4].astype(np.float32))
+    return intrins, img_size
+def load_egobody_gt_extrinsics(seq_name, start=0, end=-1, ret_4d=True):
+    path = f"{EGOBODY_ROOT}/slahmr/cameras_gt/{seq_name}/cam2world.txt"
+    assert os.path.isfile(path)
+    cam2world = np.loadtxt(path).astype(np.float32)  # (T, 16)
+    end = len(cam2world) if end < 0 else end
+    cam2world = torch.from_numpy(cam2world[start:end].reshape(-1, 4, 4))
+    if ret_4d:
+        return cam2world
+    return cam2world[:, :3, :3], cam2world[:, :3, 3]
+def load_egobody_extrinsics(seq_name, use_intrins=True, start=0, end=-1):
+    camera_name = "cameras_intrins" if use_intrins else "cameras_default"
+    path = f"{EGOBODY_ROOT}/slahmr/{camera_name}/{seq_name}/cameras.npz"
+    assert os.path.isfile(path)
+    data = np.load(path)
+    w2c = torch.from_numpy(data["w2c"].astype(np.float32))  # (N, 4, 4)
+    end = len(w2c) if end < 0 else end
+    w2c = w2c[start:end]
+    c2w = torch.linalg.inv(w2c)
+    return c2w[:, :3, :3], c2w[:, :3, 3]
+def load_egobody_meshes(seq_name, device, start=0, end=-1):
+    params = load_egobody_smpl_params(seq_name, start=start, end=end)
+    _, T = params["trans"].shape[:2]
+    with torch.no_grad():
+        gender = params["genders"][0]
+        body_model = load_body_model(T, "smpl", gender, device)
+        smpl_res = body_model(
+            trans=params["trans"][0].to(device),
+            root_orient=params["root_orient"][0].to(device),
+            betas=params["betas"][0].to(device),
+            pose_body=params["pose_body"][0].to(device),
+        )
+    res = {"joints": smpl_res.Jtr, "vertices": smpl_res.v, "faces": smpl_res.f}
+    return move_to(detach_all(res), "cpu")
+def load_egobody_kinect2holo(seq_name, ret_4d=True):
+    # load the transform from kinect12 to holo
+    # bodies are recorded in the kinect12 frame
+    path = f"{EGOBODY_ROOT}/calibrations/{seq_name}/cal_trans/holo_to_kinect12.json"
+    with open(path, "r") as f:
+        kinect2holo = np.linalg.inv(np.array(json.load(f)["trans"]))
+    kinect2holo = torch.from_numpy(kinect2holo.astype(np.float32))
+    if ret_4d:
+        return kinect2holo
+    return kinect2holo[:3, :3], kinect2holo[:3, 3]

slahmr/slahmr/eval/run_eval.py ADDED Viewed

	@@ -0,0 +1,289 @@

+import os
+import glob
+import joblib
+import json
+import pickle
+import pandas as pd
+import numpy as np
+import torch
+import egobody_utils as eb_util
+from tools import (
+    load_body_model,
+    load_results_all,
+    local_align_joints,
+    global_align_joints,
+    first_align_joints,
+    compute_accel_norm,
+    run_smpl,
+    JointRegressor,
+    EGOBODY_ROOT,
+    TDPW_ROOT,
+)
+from associate import associate_phalp_track_dirs
+def stack_torch(x_list, dim=0):
+    return torch.stack(
+        [torch.from_numpy(x.astype(np.float32)) for x in x_list], dim=dim
+    )
+def load_3dpw_params(seq_name, start=0, end=-1):
+    seq_file = f"{TDPW_ROOT}/sequenceFiles/test/{seq_name}.pkl"
+    with open(seq_file, "rb") as f:
+        data = pickle.load(f, encoding="latin1")
+    M = len(data["poses"])
+    T = len(data["poses"][0])
+    end = T + 1 + end if end < 0 else end
+    T = end - start
+    trans = stack_torch([x[start:end] for x in data["trans"]])  # (M, T, 3)
+    poses = stack_torch([x[start:end] for x in data["poses"]])  # (M, T, 72)
+    betas = stack_torch([x[None, :10] for x in data["betas"]]).expand(
+        M, T, 10
+    )  # (M, T, 10)
+    keypts2d = stack_torch([x[start:end] for x in data["poses2d"]])  # (M, T, 3, 18)
+    valid_cam = stack_torch(
+        [x[start:end] for x in data["campose_valid"]]
+    ).bool()  # (M, T)
+    valid_kp = (keypts2d.reshape(M, T, -1) > 0).any(dim=-1).bool()  # (M, T)
+    valid = valid_cam & valid_kp
+    genders = ["male" if x == "m" else "female" for x in data["genders"]]  # (M)
+    return {
+        "root_orient": poses[..., :3],
+        "pose_body": poses[..., 3:],
+        "trans": trans,
+        "betas": betas,
+        "keypts2d": keypts2d,
+        "valid": valid,
+        "genders": genders,
+    }
+def load_egobody_params(seq_name, start=0, end=-1):
+    """
+    returns dict of
+    - trans (1, T, 3)
+    - root_orient (1, T, 3)
+    - pose_body (1, T, 63)
+    - betas (1, T, 10)
+    - gender (str)
+    - keypts2d (1, T, J, 3)
+    - valid (1, T)
+    """
+    smpl_dict = eb_util.load_egobody_smpl_params(seq_name, start=start, end=end)
+    kps, valid = eb_util.get_egobody_keypoints(seq_name, start=start, end=end)
+    smpl_dict["keypts2d"] = torch.from_numpy(kps.astype(np.float32))[None]
+    smpl_dict["valid"] = torch.from_numpy(valid.astype(bool))[None]
+    return smpl_dict
+def eval_result_dir(
+    dset_type, res_dir, out_path, joint_reg, dev_id=0, overwrite=False, debug=False
+):
+    if os.path.isfile(out_path) and not overwrite:
+        print(f"{out_path} already exists, skipping.")
+        return
+    # get the output metadata
+    track_file = f"{res_dir}/track_info.json"
+    if not os.path.isfile(track_file):
+        print(f"{track_file} does not exist, skipping")
+        return
+    with open(track_file, "r") as f:
+        track_dict = json.load(f)
+    start, end = track_dict["meta"]["data_interval"]
+    seq_name = os.path.basename(res_dir).split("-")[0]
+    print("EVALUATING", res_dir, seq_name, start, end)
+    # get the associations from PHALP tracks to GT tracks
+    track_info = track_dict["tracks"]
+    track_ids = sorted(track_info, key=lambda k: track_info[k]["index"])
+    print("TRACK IDS", track_ids)
+    if dset_type == "egobody":
+        # load the GT params
+        gt_params = load_egobody_params(seq_name, start, end)
+        phalp_dir = f"{EGOBODY_ROOT}/slahmr/track_preds/{seq_name}"
+        img_dir = eb_util.get_egobody_img_dir(seq_name)
+    elif dset_type == "3dpw":
+        gt_params = load_3dpw_params(seq_name, start, end)
+        phalp_dir = f"{TDPW_ROOT}/slahmr/track_gt/{seq_name}"
+        img_dir = f"{TDPW_ROOT}/imageFiles/{seq_name}"
+    else:
+        raise NotImplementedError
+    # (M, T) GT track index for each frame and each PHALP track
+    match_idcs = associate_phalp_track_dirs(
+        phalp_dir,
+        img_dir,
+        track_ids,
+        gt_params["keypts2d"],
+        start=start,
+        end=end,
+        debug=debug,
+    )
+    # M number of PHALP tracks
+    M = len(track_ids)
+    # get the GT joints
+    G, T = gt_params["pose_body"].shape[:2]
+    device = torch.device(f"cuda:{dev_id}")
+    gt_joints = []
+    for g in range(G):
+        body_model = load_body_model(T, "smpl", gt_params["genders"][g], device)
+        gt_smpl = run_smpl(
+            body_model,
+            betas=gt_params["betas"][g].to(device),
+            trans=gt_params["trans"][g].to(device),
+            root_orient=gt_params["root_orient"][g].to(device),
+            pose_body=gt_params["pose_body"][g].to(device),
+        )
+        gt_joints.append(joint_reg(gt_smpl["vertices"]))  # (T, 15, 3)
+    gt_joints = torch.stack(gt_joints, dim=0)
+    J, D = gt_joints.shape[-2:]
+    # select the correct GT person for each track
+    gt_valid = gt_params["valid"]  # (G, T)
+    idcs = match_idcs.clone().reshape(M, T, 1, 1).expand(-1, -1, J, D)
+    idcs[idcs == -1] = 0  # gather dummy for invalid matches
+    gt_match_joints = torch.gather(gt_joints, 0, idcs)
+    gt_match_valid = torch.gather(gt_valid, 0, idcs[:, :, 0, 0])
+    valid = gt_match_valid & (match_idcs != -1)
+    # use the vis_mask to get the correct data subsequence
+    vis_mask = torch.tensor(
+        [track_info[tid]["vis_mask"] for tid in track_ids]
+    )  # (M, T)
+    vis_tracks = torch.where(vis_mask.any(dim=1))[0]  # (B,)
+    vis_idcs = torch.where(vis_mask.any(dim=0))[0]
+    sidx, eidx = vis_idcs.min(), vis_idcs.max() + 1
+    L = eidx - sidx
+    valid_seq = valid[vis_tracks, sidx:eidx]  # (B, L)
+    gt_seq_joints = gt_match_joints[vis_tracks, sidx:eidx]  # (B, L, *)
+    gt_seq_joints = gt_seq_joints[valid_seq]
+    if debug:
+        print(f"vis start {sidx}, end {eidx}, L {L}")
+        print("valid track matches", (match_idcs != -1).sum())
+        print("filtered gt joints", gt_seq_joints.shape)
+    # get the outputs of each phase
+    PHASES = ["root_fit", "smooth_fit", "motion_chunks"]
+    metric_names = ["ga_jmse", "fa_jmse", "pampjpe", "acc_norm"]
+    phase_metrics = {name: [-1 for _ in PHASE] for name in metric_names}
+    cur_metrics = {name: np.nan for name in metric_names}
+    for i, phase in enumerate(PHASES):
+        res_dict = load_results_all(os.path.join(res_dir, phase), device)
+        if res_dict is None:
+            print(f"PHASE {phase} did not optimize")
+            # update all metrics for this phase
+            for name in metric_names:
+                phase_metrics[name][i] = float(cur_metrics[name])
+            print(phase, phase_metrics)
+            continue
+        # (M, L, -1, 3) verts, (M, L) mask
+        res_verts = res_dict["vertices"][valid_seq]
+        res_joints = joint_reg(res_verts)  # (*, 15, 3_
+        for name in metric_names:
+            if name == "acc_norm":
+                target = compute_accel_norm(gt_seq_joints)  # (T-2, J)
+                pred = compute_accel_norm(res_joints)
+            else:
+                target = gt_seq_joints
+                if name == "pampjpe":
+                    pred = local_align_joints(gt_seq_joints, res_joints)
+                if name == "ga_jmse":
+                    pred = global_align_joints(gt_seq_joints, res_joints)
+                if name == "fa_jmse":
+                    pred = first_align_joints(gt_seq_joints, res_joints)
+                else:
+                    raise NotImplementedError
+            cur_metrics[name] = torch.linalg.norm(target - pred, dim=-1).mean()
+            phase_metrics[name][i] = float(cur_metrics[name])
+            print(phase, name, cur_metrics[name])
+    df_dict = {"phases": PHASES}
+    df_dict.update(phase_metrics)
+    df = pd.DataFrame.from_dict(df_dict)
+    df.to_csv(out_path, index=False)
+    print(f"saved metrics to {out_path}")
+def parse_job_file(args):
+    subseq_names = []
+    with open(args.job_file, "r") as f:
+        for line in f.readlines():
+            cmd_args = line.strip().split()
+            seq_name, start_str, end_str = cmd_args[:3]
+            start = start_str.split("=")[-1]
+            end = end_str.split("=")[-1]
+            track_name = "longest-2" if args.dset_type == "3dpw" else "all"
+            if len(cmd_args) > 3:
+                track_name = cmd_args[3].split("=")[-1]
+            subseq_names.append(f"{seq_name}-{track_name}-{start}-{end}")
+    return subseq_names
+def main(args):
+    joint_reg = JointRegressor()
+    out_root = args.out_root if args.out_root is not None else args.res_root
+    os.makedirs(out_root, exist_ok=True)
+    subseq_names = parse_job_file(args)
+    for subseq in subseq_names:
+        res_dir = os.path.join(args.res_root, subseq)
+        out_path = os.path.join(out_root, f"{subseq}.txt")
+        eval_result_dir(
+            args.dset_type,
+            res_dir,
+            out_path,
+            joint_reg,
+            overwrite=args.overwrite,
+            debug=args.debug,
+        )
+    metric_paths = glob.glob(f"{out_root}/[!_]*.txt")
+    dfs = [pd.read_csv(path) for path in metric_paths]
+    merged = pd.concat(dfs).groupby("phase").mean()
+    merged.to_csv(f"{out_root}/_final_metrics.txt")
+    print(merged)
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-d",
+        "--dset_type",
+        required=True,
+        choices=["egobody", "3dpw"],
+        help="dataset to evaluate on, choices: (3dpw, egobody)",
+    )
+    parser.add_argument(
+        "-i", "--res_root", required=True, help="root directory of outputs to evaluate"
+    )
+    parser.add_argument(
+        "-f",
+        "--job_file",
+        required=True,
+        help="job file specifying the examples to run and evaluate",
+    )
+    parser.add_argument(
+        "-o",
+        "--out_root",
+        default=None,
+        help="directory to save computed metrics, default is res_root",
+    )
+    parser.add_argument("-y", "--overwrite", action="store_true")
+    parser.add_argument("-d", "--debug", action="store_true")
+    args = parser.parse_args()
+    main(args)

slahmr/slahmr/eval/split_3dpw.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import os
+import glob
+import itertools
+import joblib
+from eval_3dpw import load_3dpw_params
+from associate import associate_frame
+from tools import TDPW_ROOT
+"""
+Script to find the associations of ground truth 3DPW tracks with the detected PHALP tracks
+Will write a job specification file to ../job_specs with which track IDs to run optimization on
+"""
+IMG_ROOT = f"{TDPW_ROOT}/imageFiles"
+SRC_DIR = f"{TDPW_ROOT}/sequenceFiles"
+PHALP_DIR = f"{TDPW_ROOT}/slahmr/phalp_out/results"
+def load_split_sequences(split):
+    assert split in ["train", "val", "test"]
+    split_dir = f"{SRC_DIR}/{split}"
+    seq_files = sorted(os.listdir(split_dir))
+    return [os.path.splitext(f)[0] for f in seq_files]
+def select_phalp_tracks(seq_name, split, start, end, debug=False):
+    """
+    Select the best phalp track for each GT person for each frame.
+    Returns all phalp tracks that match GT over sequence
+    """
+    phalp_file = f"{PHALP_DIR}/{seq_name}.pkl"
+    track_data = joblib.load(phalp_file)
+    img_names = sorted(track_data.keys())
+    sel_imgs = img_names[start:end]
+    gt_params = load_3dpw_params(f"{SRC_DIR}/{split}/{seq_name}.pkl", start, end)
+    gt_kps = gt_params["keypts2d"]
+    G, T = gt_kps.shape[:2]  # G num people, T num frames
+    assert len(sel_imgs) == T, f"found {len(sel_imgs)} frames, expected {T}"
+    track_ids = set()
+    for frame in sel_imgs:
+        frame_data = track_data[frame]
+        for tid in frame_data["tracked_ids"]:
+            track_ids.add(str(tid))
+    track_ids = list(track_ids)
+    M = len(track_ids)
+    track_idcs = {tid: m for m, tid in enumerate(track_ids)}
+    # get the best matching PHALP track for each GT person
+    sel_tracks = set()
+    for t, frame_name in enumerate(sel_imgs):
+        frame_data = track_data[frame_name]
+        for g in range(G):
+            kp_gt = gt_kps[g, t].T.numpy()  # (18, 3)
+            # get the best track ID for the GT person
+            tid = associate_frame(frame_data, kp_gt, track_ids, debug=debug)
+            if tid == -1:
+                continue
+            sel_tracks.add(int(tid))
+    return list(sel_tracks)
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--seq_len", type=int, default=100)
+    parser.add_argument(
+        "--split", default="test", choices=["train", "val", "test", "all"]
+    )
+    parser.add_argument("--prefix", default="3dpw")
+    args = parser.parse_args()
+    seqs = load_split_sequences(args.split)
+    job_arg_strs = []
+    for seq in seqs:
+        num_imgs = len(glob.glob(f"{IMG_ROOT}/{seq}/*.jpg"))
+        splits = list(range(0, num_imgs, args.seq_len))
+        splits[-1] = num_imgs  # just add the remainder to the last job
+        for start, end in zip(splits[:-1], splits[1:]):
+            sel_tracks = select_phalp_tracks(seq, args.split, start, end)
+            if len(sel_tracks) < 1:
+                continue
+            track_str = "-".join([f"{tid:03d}" for tid in sel_tracks])
+            arg_str = (
+                f"{seq} data.start_idx={start} data.end_idx={end} "
+                f"data.track_ids={track_str}"
+            )
+            print(arg_str)
+            job_arg_strs.append(arg_str)
+    with open(
+        f"../job_specs/{args.prefix}_{args.split}_len_{args.seq_len}.txt", "w"
+    ) as f:
+        f.write("\n".join(job_arg_strs))

slahmr/slahmr/eval/split_egobody.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import os
+import glob
+import joblib
+import itertools
+import numpy as np
+import pandas as pd
+from associate import associate_frame
+from tools import EGOBODY_ROOT
+"""
+Script to find the associations of ground truth Egobody tracks with the detected PHALP tracks
+Will write a job specification file to ../job_specs with which track IDs to run optimization on
+"""
+IMG_ROOT = f"{EGOBODY_ROOT}/egocentric_color"
+PHALP_DIR = f"{EGOBODY_ROOT}/slahmr/phalp_out/results"
+def load_split_sequences(split):
+    split_file = "{EGOBODY_ROOT}/data_splits.csv"
+    df = pd.read_csv(split_file)
+    if split not in df.columns:
+        print(f"{split} not in {split_file}")
+        return []
+    return df[split].dropna().tolist()
+def get_egobody_keypoints(img_dir, start, end):
+    kp_file = f"{img_dir}/keypoints.npz"
+    valid_file = f"{img_dir}/valid_frame.npz"
+    img_paths = sorted(glob.glob(f"{img_dir}/PV/*.jpg"))[start:end]
+    img_names = [os.path.basename(x) for x in img_paths]
+    kp_dict = {}
+    valid_dict = {}
+    kp_data = np.load(kp_file)
+    valid_data = np.load(valid_file)
+    zeros = np.zeros_like(kp_data["keypoints"][0])
+    for img_path, kps in zip(kp_data["imgname"], kp_data["keypoints"]):
+        img_name = os.path.basename(img_path)
+        kp_dict[img_name] = kps
+    for img_path, valid in zip(valid_data["imgname"], valid_data["valid"]):
+        img_name = os.path.basename(img_path)
+        valid_dict[img_name] = valid
+    kps = np.stack([kp_dict.get(name, zeros) for name in img_names], axis=0)
+    valid = np.stack([valid_dict.get(name, False) for name in img_names], axis=0)
+    return kps, valid
+def select_phalp_tracks(seq_name, img_dir, start, end, debug=False):
+    """
+    Get the best phalp track for each GT person for each frame
+    Returns all phalp tracks that match GT over sequence
+    """
+    phalp_file = f"{PHALP_DIR}/{seq_name}.pkl"
+    track_data = joblib.load(phalp_file)
+    img_names = sorted(track_data.keys())
+    sel_imgs = img_names[start:end]
+    kps_all, valid = get_egobody_keypoints(img_dir, start, end)
+    T = len(kps_all)
+    assert len(sel_imgs) == T, f"found {len(sel_imgs)} frames, expected {T}"
+    track_ids = set()
+    for frame in sel_imgs:
+        frame_data = track_data[frame]
+        for tid in frame_data["tracked_ids"]:
+            track_ids.add(str(tid))
+    track_ids = list(track_ids)
+    M = len(track_ids)
+    track_idcs = {tid: m for m, tid in enumerate(track_ids)}
+    # get the best matching PHALP track for each GT person
+    sel_tracks = set()
+    for t, frame_name in enumerate(sel_imgs):
+        frame_data = track_data[frame_name]
+        # get the best track ID for the GT person
+        tid = associate_frame(frame_data, kps_all[t], track_ids, debug=debug)
+        if tid == -1:
+            continue
+        sel_tracks.add(int(tid))
+    return list(sel_tracks)
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--seq_len", type=int, default=100)
+    parser.add_argument("--split", default="val", choices=["train", "val", "test"])
+    parser.add_argument("--prefix", default="ego")
+    args = parser.parse_args()
+    seqs = load_split_sequences(args.split)
+    job_arg_strs = []
+    for seq in seqs:
+        img_dir = glob.glob(f"{IMG_ROOT}/{seq}/**/")[0]
+        num_imgs = len(glob.glob(f"{img_dir}/PV/*.jpg"))
+        splits = list(range(0, num_imgs, args.seq_len))
+        splits[-1] = num_imgs  # just add the remainder to the last job
+        for start, end in zip(splits[:-1], splits[1:]):
+            sel_tracks = select_phalp_tracks(seq, img_dir, start, end)
+            if len(sel_tracks) < 1:
+                continue
+            track_str = "-".join([f"{tid:03d}" for tid in sel_tracks])
+            arg_str = (
+                f"{seq} data.start_idx={start} data.end_idx={end} "
+                f"data.track_ids={track_str}"
+            )
+            print(arg_str)
+            job_arg_strs.append(arg_str)
+    with open(
+        f"../job_specs/{args.prefix}_{args.split}_len_{args.seq_len}_tracks.txt", "w"
+    ) as f:
+        f.write("\n".join(job_arg_strs))

slahmr/slahmr/eval/tools.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import os
+import sys
+import glob
+import json
+import joblib
+import numpy as np
+import smplx
+import torch
+from util.loaders import load_smpl_body_model
+from util.tensor import move_to, detach_all, to_torch
+from optim.output import load_result, get_results_paths
+from geometry.pcl import align_pcl
+from geometry.rotation import batch_rodrigues
+BASE_DIR = os.path.abspath(f"{__file__}/../../../")
+JOINT_REG_PATH = f"{BASE_DIR}/_DATA/body_models/J_regressor_h36m.npy"
+# XXX: Sorry, need to change this yourself
+EGOBODY_ROOT = "/path/to/egobody"
+TDPW_ROOT = "/path/to/3DPW"
+class JointRegressor(object):
+    def __init__(self):
+        # (17, 6890)
+        R17 = torch.from_numpy(np.load(JOINT_REG_PATH).astype(np.float32))
+        # (14,)  adding the root, but will omit
+        joint_map_h36m = torch.tensor([6, 5, 4, 1, 2, 3, 16, 15, 14, 11, 12, 13, 8, 10])
+        self.regressor = R17[joint_map_h36m]  # (14, 6890)
+    def to(self, device):
+        self.regressor = self.regressor.to(device)
+    def __call__(self, verts):
+        """
+        NOTE: RETURNS ROOT AS WELL
+        :param verts (*, V, 3)
+        returns (*, J, 3) 14 standard evaluation joints
+        """
+        return torch.einsum("nv,...vd->...nd", self.regressor, verts)  # (..., 14, 3)
+def compute_accel_norm(joints):
+    """
+    :param joints (T, J, 3)
+    """
+    vel = joints[1:] - joints[:-1]  # (T-1, J, 3)
+    acc = vel[1:] - vel[:-1]  # (T-2, J, 3)
+    return torch.linalg.norm(acc, dim=-1)
+def global_align_joints(gt_joints, pred_joints):
+    """
+    :param gt_joints (T, J, 3)
+    :param pred_joints (T, J, 3)
+    """
+    s_glob, R_glob, t_glob = align_pcl(
+        gt_joints.reshape(-1, 3), pred_joints.reshape(-1, 3)
+    )
+    pred_glob = (
+        s_glob * torch.einsum("ij,tnj->tni", R_glob, pred_joints) + t_glob[None, None]
+    )
+    return pred_glob
+def first_align_joints(gt_joints, pred_joints):
+    """
+    align the first two frames
+    :param gt_joints (T, J, 3)
+    :param pred_joints (T, J, 3)
+    """
+    # (1, 1), (1, 3, 3), (1, 3)
+    s_first, R_first, t_first = align_pcl(
+        gt_joints[:2].reshape(1, -1, 3), pred_joints[:2].reshape(1, -1, 3)
+    )
+    pred_first = (
+        s_first * torch.einsum("tij,tnj->tni", R_first, pred_joints) + t_first[:, None]
+    )
+    return pred_first
+def local_align_joints(gt_joints, pred_joints):
+    """
+    :param gt_joints (T, J, 3)
+    :param pred_joints (T, J, 3)
+    """
+    s_loc, R_loc, t_loc = align_pcl(gt_joints, pred_joints)
+    pred_loc = (
+        s_loc[:, None] * torch.einsum("tij,tnj->tni", R_loc, pred_joints)
+        + t_loc[:, None]
+    )
+    return pred_loc
+def load_body_model(batch_size, model_type, gender, device):
+    assert model_type in ["smpl", "smplh"]
+    if model_type == "smpl":
+        num_betas = 10
+        ext = "pkl"
+        use_vtx_selector = False
+    else:
+        num_betas = 16
+        ext = "npz"
+        use_vtx_selector = True
+    smpl_path = f"{BASE_DIR}/body_models/{model_type}/{gender}/model.{ext}"
+    body_model, fit_gender = load_smpl_body_model(
+        smpl_path,
+        batch_size,
+        num_betas,
+        model_type=model_type,
+        use_vtx_selector=use_vtx_selector,
+        device=device,
+    )
+    return body_model
+def run_smpl(body_model, *args, **kwargs):
+    with torch.no_grad():
+        results = body_model(*args, **kwargs)
+    return {
+        "joints": results.Jtr.detach().cpu(),
+        "vertices": results.v.detach().cpu(),
+        "faces": results.f.detach().cpu(),
+    }
+def run_smpl_batch(body_model, device, **kwargs):
+    model_kwargs = {}
+    B = body_model.bm.batch_size
+    kwarg_shape = (B,)
+    for k, v in kwargs.items():
+        kwarg_shape = v.shape[:-1]
+        model_kwargs[k] = v.reshape(B, v.shape[-1]).to(device)
+    res_flat = run_smpl(body_model, **model_kwargs)
+    res = {}
+    for k, v in res_flat.items():
+        sh = v.shape
+        if sh[0] == B:
+            v = v.reshape(*kwarg_shape, *sh[1:])
+        res[k] = v
+    return res
+def cat_dicts(dict_list, dim=0):
+    """
+    concatenate lists of dict of tensors
+    """
+    keys = set(dict_list[0].keys())
+    assert all(keys == set(d.keys()) for d in dict_list)
+    return {k: torch.stack([d[k] for d in dict_list], dim=dim) for k in keys}
+def load_results_all(phase_dir, device):
+    """
+    Load all the reconstructed tracks during optimization
+    """
+    res_path_dict = get_results_paths(phase_dir)
+    max_iter = max(res_path_dict.keys())
+    if int(max_iter) < 20:
+        print("max_iter", max_iter)
+        return None
+    res = load_result(res_path_dict[max_iter])["world"]
+    # results is dict with (B, T, *) tensors
+    trans = res["trans"]
+    B, T, _ = trans.shape
+    root_orient = res["root_orient"]
+    pose_body = res["pose_body"]
+    betas = res["betas"].reshape(B, 1, -1).expand(B, T, -1)
+    body_model = load_body_model(B * T, "smplh", "neutral", device)
+    return run_smpl_batch(
+        body_model,
+        device,
+        trans=trans,
+        root_orient=root_orient,
+        betas=betas,
+        pose_body=pose_body,
+    )

slahmr/slahmr/geometry/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from . import camera
+from . import mesh
+from . import pcl
+from . import plane
+from . import rotation

slahmr/slahmr/geometry/camera.py ADDED Viewed

	@@ -0,0 +1,348 @@

+import torch
+import numpy as np
+def perspective_projection(
+    points, focal_length, camera_center, rotation=None, translation=None
+):
+    """
+    Adapted from https://github.com/mkocabas/VIBE/blob/master/lib/models/spin.py
+    This function computes the perspective projection of a set of points.
+    Input:
+        points (bs, N, 3): 3D points
+        focal_length (bs, 2): Focal length
+        camera_center (bs, 2): Camera center
+        rotation (bs, 3, 3): OPTIONAL Camera rotation
+        translation (bs, 3): OPTIONAL Camera translation
+    """
+    batch_size = points.shape[0]
+    K = torch.zeros([batch_size, 3, 3], device=points.device)
+    K[:, 0, 0] = focal_length[:, 0]
+    K[:, 1, 1] = focal_length[:, 1]
+    K[:, 2, 2] = 1.0
+    K[:, :-1, -1] = camera_center
+    if rotation is not None and translation is not None:
+        # Transform points
+        points = torch.einsum("bij,bkj->bki", rotation, points)
+        points = points + translation.unsqueeze(1)
+    # Apply perspective distortion
+    projected_points = points / points[..., 2:3]
+    # Apply camera intrinsics
+    projected_points = torch.einsum("bij,bkj->bki", K, projected_points)
+    return projected_points[:, :, :-1]
+def reproject(points3d, cam_R, cam_t, cam_f, cam_center):
+    """
+    reproject points3d into the scene cameras
+    :param points3d (B, T, N, 3)
+    :param cam_R (B, T, 3, 3)
+    :param cam_t (B, T, 3)
+    :param cam_f (T, 2)
+    :param cam_center (T, 2)
+    """
+    B, T, N, _ = points3d.shape
+    points3d = torch.einsum("btij,btnj->btni", cam_R, points3d)
+    points3d = points3d + cam_t[..., None, :]  # (B, T, N, 3)
+    points2d = points3d[..., :2] / points3d[..., 2:3]
+    points2d = cam_f[None, :, None] * points2d + cam_center[None, :, None]
+    return points2d
+def focal2fov(focal, R):
+    """
+    :param focal, focal length
+    :param R, either W / 2 or H / 2
+    """
+    return 2 * np.arctan(R / focal)
+def fov2focal(fov, R):
+    """
+    :param fov, field of view in radians
+    :param R, either W / 2 or H / 2
+    """
+    return R / np.tan(fov / 2)
+def compute_lookat_box(bb_min, bb_max, intrins):
+    """
+    The center and distance to a scene with bb_min, bb_max
+    to place a camera with given intrinsics
+    :param bb_min (3,)
+    :param bb_max (3,)
+    :param intrinsics, (fx, fy, cx, cy) of camera
+    :param view_angle (optional) viewing angle in radians (elevation)
+    """
+    fx, fy, cx, cy = intrins
+    bb_min, bb_max = torch.tensor(bb_min), torch.tensor(bb_max)
+    center = 0.5 * (bb_min + bb_max)
+    size = torch.linalg.norm(bb_max - bb_min)
+    cam_dist = np.sqrt(fx**2 + fy**2) / np.sqrt(cx**2 + cy**2)
+    cam_dist = 0.75 * size * cam_dist
+    return center, cam_dist
+def lookat_origin(cam_dist, view_angle=-np.pi / 6):
+    """
+    :param cam_dist (float)
+    :param view_angle (float)
+    """
+    cam_dist = np.abs(cam_dist)
+    view_angle = np.abs(view_angle)
+    pos = cam_dist * torch.tensor([0, np.sin(view_angle), np.cos(view_angle)])
+    rot = rotx(view_angle)
+    return rot, pos
+def lookat_matrix(source_pos, target_pos, up):
+    """
+    IMPORTANT: USES RIGHT UP BACK XYZ CONVENTION
+    :param source_pos (*, 3)
+    :param target_pos (*, 3)
+    :param up (3,)
+    """
+    *dims, _ = source_pos.shape
+    up = up.reshape(*(1,) * len(dims), 3)
+    up = up / torch.linalg.norm(up, dim=-1, keepdim=True)
+    back = normalize(target_pos - source_pos)
+    right = normalize(torch.linalg.cross(up, back))
+    up = normalize(torch.linalg.cross(back, right))
+    R = torch.stack([right, up, back], dim=-1)
+    return make_4x4_pose(R, source_pos)
+def normalize(x):
+    return x / torch.linalg.norm(x, dim=-1, keepdim=True)
+def invert_camera(R, t):
+    """
+    :param R (*, 3, 3)
+    :param t (*, 3)
+    returns Ri (*, 3, 3), ti (*, 3)
+    """
+    R, t = torch.tensor(R), torch.tensor(t)
+    Ri = R.transpose(-1, -2)
+    ti = -torch.einsum("...ij,...j->...i", Ri, t)
+    return Ri, ti
+def compose_cameras(R1, t1, R2, t2):
+    """
+    composes [R1, t1] and [R2, t2]
+    :param R1 (*, 3, 3)
+    :param t1 (*, 3)
+    :param R2 (*, 3, 3)
+    :param t2 (*, 3)
+    """
+    R = torch.einsum("...ij,...jk->...ik", R1, R2)
+    t = t1 + torch.einsum("...ij,...j->...i", R1, t2)
+    return R, t
+def matmul_nd(A, x):
+    """
+    multiply batch matrix A to batch nd tensors
+    :param A (B, m, n)
+    :param x (B, *dims, m)
+    """
+    B, m, n = A.shape
+    assert len(A) == len(x)
+    assert x.shape[-1] == m
+    B, *dims, _ = x.shape
+    return torch.matmul(A.reshape(B, *(1,) * len(dims), m, n), x[..., None])[..., 0]
+def view_matrix(z, up, pos):
+    """
+    :param z (*, 3) up (*, 3) pos (*, 3)
+    returns (*, 4, 4)
+    """
+    *dims, _ = z.shape
+    x = normalize(torch.linalg.cross(up, z))
+    y = normalize(torch.linalg.cross(z, x))
+    bottom = (
+        torch.tensor([0, 0, 0, 1], dtype=torch.float32)
+        .reshape(*(1,) * len(dims), 1, 4)
+        .expand(*dims, 1, 4)
+    )
+    return torch.cat([torch.stack([x, y, z, pos], dim=-1), bottom], dim=-2)
+def average_pose(poses):
+    """
+    :param poses (N, 4, 4)
+    returns average pose (4, 4)
+    """
+    center = poses[:, :3, 3].mean(0)
+    up = normalize(poses[:, :3, 1].sum(0))
+    z = normalize(poses[:, :3, 2].sum(0))
+    return view_matrix(z, up, center)
+def project_so3(M, eps=1e-4):
+    """
+    :param M (N, *, 3, 3)
+    """
+    N, *dims, _, _ = M.shape
+    M = M * (1 + torch.rand(N, *dims, 1, 3, device=M.device))
+    U, D, Vt = torch.linalg.svd(M)  # (N, *, 3, 3), (N, *, 3), (N, *, 3, 3)
+    detuvt = torch.linalg.det(torch.matmul(U, Vt))  # (N, *)
+    S = torch.cat(
+        [torch.ones(N, *dims, 2, device=M.device), detuvt[..., None]], dim=-1
+    )  # (N, *, 3)
+    return torch.matmul(U, torch.matmul(torch.diag_embed(S), Vt))
+def make_translation(t):
+    return make_4x4_pose(torch.eye(3), t)
+def make_rotation(rx=0, ry=0, rz=0, order="xyz"):
+    Rx = rotx(rx)
+    Ry = roty(ry)
+    Rz = rotz(rz)
+    if order == "xyz":
+        R = Rz @ Ry @ Rx
+    elif order == "xzy":
+        R = Ry @ Rz @ Rx
+    elif order == "yxz":
+        R = Rz @ Rx @ Ry
+    elif order == "yzx":
+        R = Rx @ Rz @ Ry
+    elif order == "zyx":
+        R = Rx @ Ry @ Rz
+    elif order == "zxy":
+        R = Ry @ Rx @ Rz
+    return make_4x4_pose(R, torch.zeros(3))
+def make_4x4_pose(R, t):
+    """
+    :param R (*, 3, 3)
+    :param t (*, 3)
+    return (*, 4, 4)
+    """
+    dims = R.shape[:-2]
+    pose_3x4 = torch.cat([R, t.view(*dims, 3, 1)], dim=-1)
+    bottom = (
+        torch.tensor([0, 0, 0, 1], device=R.device)
+        .reshape(*(1,) * len(dims), 1, 4)
+        .expand(*dims, 1, 4)
+    )
+    return torch.cat([pose_3x4, bottom], dim=-2)
+def normalize(x):
+    return x / torch.sqrt(torch.sum(x**2, dim=-1, keepdim=True))
+def rotx(theta):
+    return torch.tensor(
+        [
+            [1, 0, 0],
+            [0, np.cos(theta), -np.sin(theta)],
+            [0, np.sin(theta), np.cos(theta)],
+        ],
+        dtype=torch.float32,
+    )
+def roty(theta):
+    return torch.tensor(
+        [
+            [np.cos(theta), 0, np.sin(theta)],
+            [0, 1, 0],
+            [-np.sin(theta), 0, np.cos(theta)],
+        ],
+        dtype=torch.float32,
+    )
+def rotz(theta):
+    return torch.tensor(
+        [
+            [np.cos(theta), -np.sin(theta), 0],
+            [np.sin(theta), np.cos(theta), 0],
+            [0, 0, 1],
+        ],
+        dtype=torch.float32,
+    )
+def relative_pose_c2w(Rwc1, Rwc2, twc1, twc2):
+    """
+    compute relative pose from cam 1 to cam 2 given c2w pose matrices
+    :param Rwc1, Rwc2 (N, 3, 3) cam1, cam2 to world rotations
+    :param twc1, twc2 (N, 3) cam1, cam2 to world translations
+    returns R21 (N, 3, 3) t21 (N, 3)
+    """
+    twc1 = twc1.view(-1, 3, 1)
+    twc2 = twc2.view(-1, 3, 1)
+    Rc2w = Rwc2.transpose(-1, -2)  # world to c2
+    tc2w = -torch.matmul(Rc2w, twc2)
+    Rc2c1 = torch.matmul(Rc2w, Rwc1)
+    tc2c1 = tc2w + torch.matmul(Rc2w, twc1)
+    return Rc2c1, tc2c1[..., 0]
+def relative_pose_w2c(Rc1w, Rc2w, tc1w, tc2w):
+    """
+    compute relative pose from cam 1 to cam 2 given w2c camera matrices
+    :param Rc1w, Rc2w (N, 3, 3) world to cam1, cam2 rotations
+    :param tc1w, tc2w (N, 3) world to cam1, cam2 translations
+    """
+    tc1w = tc1w.view(-1, 3, 1)
+    tc2w = tc2w.view(-1, 3, 1)
+    # we keep the world to cam transforms
+    Rwc1 = Rc1w.transpose(-1, -2)  # c1 to world
+    twc1 = -torch.matmul(Rwc1, tc1w)
+    Rc2c1 = torch.matmul(Rc2w, Rwc1)  # c1 to c2
+    tc2c1 = tc2w + torch.matmul(Rc2w, twc1)
+    return Rc2c1, tc2c1[..., 0]
+def project(xyz_c, center, focal, eps=1e-5):
+    """
+    :param xyz_c (*, 3) 3d point in camera coordinates
+    :param focal (1)
+    :param center (*, 2)
+    return (*, 2)
+    """
+    return focal * xyz_c[..., :2] / (xyz_c[..., 2:3] + eps) + center  # (N, *, 2)
+def convert_yup(xyz):
+    """
+    converts points in x right y down z forward to x right y up z back
+    :param xyz (*, 3)
+    """
+    x, y, z = torch.split(xyz[..., :3], 1, dim=-1)
+    return torch.cat([x, -y, -z], dim=-1)
+def inv_project(uv, z, center, focal, yup=True):
+    """
+    :param uv (*, 2)
+    :param z (*, 1)
+    :param center (*, 2)
+    :param focal (1)
+    :returns (*, 3)
+    """
+    uv = uv - center
+    if yup:
+        return z * torch.cat(
+            [uv[..., :1] / focal, -uv[..., 1:2] / focal, -torch.ones_like(uv[..., :1])],
+            dim=-1,
+        )  # (N, *, 3)
+    return z * torch.cat(
+        [uv / focal, torch.ones_like(uv[..., :1])],
+        dim=-1,
+    )  # (N, *, 3)

slahmr/slahmr/geometry/mesh.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import os
+import numpy as np
+import trimesh
+def get_mesh_bb(mesh):
+    """
+    :param mesh - trimesh mesh object
+    returns bb_min (3), bb_max (3)
+    """
+    bb_min = mesh.vertices.max(axis=0)
+    bb_max = mesh.vertices.min(axis=0)
+    return bb_min, bb_max
+def get_scene_bb(meshes):
+    """
+    :param mesh_seqs - (potentially nested) list of trimesh objects
+    returns bb_min (3), bb_max (3)
+    """
+    if isinstance(meshes, trimesh.Trimesh):
+        return get_mesh_bb(meshes)
+    bb_mins, bb_maxs = zip(*[get_scene_bb(mesh) for mesh in meshes])
+    bb_mins = np.stack(bb_mins, axis=0)
+    bb_maxs = np.stack(bb_maxs, axis=0)
+    return bb_mins.min(axis=0), bb_maxs.max(axis=0)
+def make_batch_mesh(verts, faces, colors):
+    """
+    convenience function to make batch of meshes
+    meshs have same faces in batch, verts have same color in mesh
+    :param verts (B, V, 3)
+    :param faces (F, 3)
+    :param colors (B, 3)
+    """
+    B, V, _ = verts.shape
+    return [make_mesh(verts[b], faces, colors[b, None].expand(V, -1)) for b in range(B)]
+def make_mesh(verts, faces, colors=None, yup=True):
+    """
+    create a trimesh object for the faces and vertices
+    :param verts (V, 3) tensor
+    :param faces (F, 3) tensor
+    :param colors (optional) (V, 3) tensor
+    :param yup (optional bool) whether or not to save with Y up
+    """
+    verts = verts.detach().cpu().numpy()
+    faces = faces.detach().cpu().numpy()
+    if yup:
+        verts = np.array([1, -1, -1])[None, :] * verts
+    if colors is None:
+        colors = np.ones_like(verts) * 0.5
+    else:
+        colors = colors.detach().cpu().numpy()
+    return trimesh.Trimesh(
+        vertices=verts, faces=faces, vertex_colors=colors, process=False
+    )
+def save_mesh_scenes(out_dir, scenes):
+    """
+    :param scenes, list of scenes (list of meshes)
+    """
+    assert isinstance(scenes, list)
+    assert isinstance(scenes[0], list)
+    B = len(scenes[0])
+    if B == 1:
+        save_meshes_to_obj(out_dir, [x[0] for x in scenes])
+    else:
+        save_scenes_to_glb(out_dir, scenes)
+def save_scenes_to_glb(out_dir, scenes):
+    """
+    Saves a list of scenes (list of meshes) each to glb files
+    """
+    os.makedirs(out_dir, exist_ok=True)
+    for t, meshes in enumerate(scenes):
+        save_meshes_to_glb(f"{out_dir}/scene_{t:03d}.glb", meshes)
+def save_meshes_to_glb(path, meshes, names=None):
+    """
+    put trimesh meshes in a scene and export to glb
+    """
+    if names is not None:
+        assert len(meshes) == len(names)
+    scene = trimesh.Scene()
+    for i, mesh in enumerate(meshes):
+        name = f"mesh_{i:03d}" if names is None else names[i]
+        scene.add_geometry(mesh, node_name=name)
+    with open(path, "wb") as f:
+        f.write(trimesh.exchange.gltf.export_glb(scene, include_normals=True))
+def save_meshes_to_obj(out_dir, meshes, names=None):
+    if names is not None:
+        assert len(meshes) == len(names)
+    os.makedirs(out_dir, exist_ok=True)
+    for i, mesh in enumerate(meshes):
+        name = f"mesh_{i:03d}" if names is None else names[i]
+        path = os.path.join(out_dir, f"{name}.obj")
+        with open(path, "w") as f:
+            mesh.export(f, file_type="obj", include_color=False, include_normals=True)

slahmr/slahmr/geometry/pcl.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import numpy as np
+import torch
+def read_pcl_tensor(path):
+    pcl_np = read_pcl(path)
+    return torch.from_numpy(pcl_np)
+def align_pcl(Y, X, weight=None, fixed_scale=False):
+    """align similarity transform to align X with Y using umeyama method
+    X' = s * R * X + t is aligned with Y
+    :param Y (*, N, 3) first trajectory
+    :param X (*, N, 3) second trajectory
+    :param weight (*, N, 1) optional weight of valid correspondences
+    :returns s (*, 1), R (*, 3, 3), t (*, 3)
+    """
+    *dims, N, _ = Y.shape
+    N = torch.ones(*dims, 1, 1) * N
+    if weight is not None:
+        Y = Y * weight
+        X = X * weight
+        N = weight.sum(dim=-2, keepdim=True)  # (*, 1, 1)
+    # subtract mean
+    my = Y.sum(dim=-2) / N[..., 0]  # (*, 3)
+    mx = X.sum(dim=-2) / N[..., 0]
+    y0 = Y - my[..., None, :]  # (*, N, 3)
+    x0 = X - mx[..., None, :]
+    if weight is not None:
+        y0 = y0 * weight
+        x0 = x0 * weight
+    # correlation
+    C = torch.matmul(y0.transpose(-1, -2), x0) / N  # (*, 3, 3)
+    U, D, Vh = torch.linalg.svd(C)  # (*, 3, 3), (*, 3), (*, 3, 3)
+    S = torch.eye(3).reshape(*(1,) * (len(dims)), 3, 3).repeat(*dims, 1, 1)
+    neg = torch.det(U) * torch.det(Vh.transpose(-1, -2)) < 0
+    S[neg, 2, 2] = -1
+    R = torch.matmul(U, torch.matmul(S, Vh))  # (*, 3, 3)
+    D = torch.diag_embed(D)  # (*, 3, 3)
+    if fixed_scale:
+        s = torch.ones(*dims, 1, device=Y.device, dtype=torch.float32)
+    else:
+        var = torch.sum(torch.square(x0), dim=(-1, -2), keepdim=True) / N  # (*, 1, 1)
+        s = (
+            torch.diagonal(torch.matmul(D, S), dim1=-2, dim2=-1).sum(
+                dim=-1, keepdim=True
+            )
+            / var[..., 0]
+        )  # (*, 1)
+    t = my - s * torch.matmul(R, mx[..., None])[..., 0]  # (*, 3)
+    return s, R, t

slahmr/slahmr/geometry/plane.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import torch
+def fit_plane(points):
+    """
+    :param points (*, N, 3)
+    returns (*, 3) plane parameters (returns in normal * offset format)
+    """
+    *dims, N, D = points.shape
+    mean = points.mean(dim=-2, keepdim=True)
+    # (*, N, D), (*, D), (*, D, D)
+    U, S, Vh = torch.linalg.svd(points - mean)
+    normal = Vh[..., -1, :]  # (*, D)
+    offset = torch.einsum("...ij,...j->...i", points, normal)  # (*, N)
+    offset = offset.mean(dim=-1, keepdim=True)
+    return torch.cat([normal, offset], dim=-1)
+def get_plane_transform(up, ground_plane=None, xyz_orig=None):
+    """
+    get R, t rigid transform from plane and desired origin
+    :param up (3,) up vector of coordinate frame
+    :param ground_plane (4) (a, b, c, d) where a,b,c is the normal
+    :param xyz_orig (3) desired origin
+    """
+    R = torch.eye(3)
+    t = torch.zeros(3)
+    if ground_plane is None:
+        return R, t
+    # compute transform between world up vector and passed in floor
+    ground_plane = torch.as_tensor(ground_plane)
+    ground_plane = torch.sign(ground_plane[3]) * ground_plane
+    normal = ground_plane[:3]
+    normal = normal / torch.linalg.norm(normal)
+    v = torch.linalg.cross(up, normal)
+    ang_sin = torch.linalg.norm(v)
+    ang_cos = up.dot(normal)
+    skew_v = torch.as_tensor([[0.0, -v[2], v[1]], [v[2], 0.0, -v[0]], [-v[1], v[0], 0.0]])
+    R = torch.eye(3) + skew_v + (skew_v @ skew_v) * ((1.0 - ang_cos) / (ang_sin**2))
+    # project origin onto plane
+    if xyz_orig is None:
+        xyz_orig = torch.zeros(3)
+    t, _ = compute_plane_intersection(xyz_orig, -normal, ground_plane)
+    return R, t
+def parse_floor_plane(floor_plane):
+    """
+    Takes floor plane in the optimization form (Bx3 with a,b,c * d) and parses into
+    (a,b,c,d) from with (a,b,c) normal facing "up in the camera frame and d the offset.
+    """
+    floor_offset = torch.norm(floor_plane, dim=-1, keepdim=True)
+    floor_normal = floor_plane / floor_offset
+    # in camera system -y is up, so floor plane normal y component should never be positive
+    #       (assuming the camera is not sideways or upside down)
+    neg_mask = floor_normal[..., 1:2] > 0.0
+    floor_normal = torch.where(
+        neg_mask.expand_as(floor_normal), -floor_normal, floor_normal
+    )
+    floor_offset = torch.where(neg_mask, -floor_offset, floor_offset)
+    floor_plane_4d = torch.cat([floor_normal, floor_offset], dim=-1)
+    return floor_plane_4d
+def compute_plane_intersection(point, direction, plane):
+    """
+    Given a ray defined by a point in space and a direction,
+    compute the intersection point with the given plane.
+    Detect intersection in either direction or -direction.
+    Note, ray may not actually intersect with the plane.
+    Returns the intersection point and s where
+    point + s * direction = intersection_point. if s < 0 it means
+    -direction intersects.
+    - point : B x 3
+    - direction : B x 3
+    - plane : B x 4 (a, b, c, d) where (a, b, c) is the normal and (d) the offset.
+    """
+    dims = point.shape[:-1]
+    plane_normal = plane[..., :3]
+    plane_off = plane[..., 3]
+    s = (plane_off - bdot(plane_normal, point)) / (bdot(plane_normal, direction) + 1e-4)
+    itsct_pt = point + s.reshape((-1, 1)) * direction
+    return itsct_pt, s
+def bdot(A1, A2, keepdim=False):
+    """
+    Batched dot product.
+    - A1 : B x D
+    - A2 : B x D.
+    Returns B.
+    """
+    return (A1 * A2).sum(dim=-1, keepdim=keepdim)

slahmr/slahmr/geometry/rotation.py ADDED Viewed

	@@ -0,0 +1,284 @@

+import torch
+import numpy as np
+from torch.nn import functional as F
+def batch_rodrigues(rot_vecs, epsilon=1e-8, dtype=torch.float32):
+    """
+    Taken from https://github.com/mkocabas/VIBE/blob/master/lib/utils/geometry.py
+    Calculates the rotation matrices for a batch of rotation vectors
+    - param rot_vecs: torch.tensor (N, 3) array of N axis-angle vectors
+    - returns R: torch.tensor (N, 3, 3) rotation matrices
+    """
+    batch_size = rot_vecs.shape[0]
+    device = rot_vecs.device
+    angle = torch.norm(rot_vecs + 1e-8, dim=1, keepdim=True)
+    rot_dir = rot_vecs / angle
+    cos = torch.unsqueeze(torch.cos(angle), dim=1)
+    sin = torch.unsqueeze(torch.sin(angle), dim=1)
+    # Bx1 arrays
+    rx, ry, rz = torch.split(rot_dir, 1, dim=1)
+    K = torch.zeros((batch_size, 3, 3), dtype=dtype, device=device)
+    zeros = torch.zeros((batch_size, 1), dtype=dtype, device=device)
+    K = torch.cat([zeros, -rz, ry, rz, zeros, -rx, -ry, rx, zeros], dim=1).view(
+        (batch_size, 3, 3)
+    )
+    ident = torch.eye(3, dtype=dtype, device=device).unsqueeze(dim=0)
+    rot_mat = ident + sin * K + (1 - cos) * torch.bmm(K, K)
+    return rot_mat
+def quaternion_mul(q0, q1):
+    """
+    EXPECTS WXYZ
+    :param q0 (*, 4)
+    :param q1 (*, 4)
+    """
+    r0, r1 = q0[..., :1], q1[..., :1]
+    v0, v1 = q0[..., 1:], q1[..., 1:]
+    r = r0 * r1 - (v0 * v1).sum(dim=-1, keepdim=True)
+    v = r0 * v1 + r1 * v0 + torch.linalg.cross(v0, v1)
+    return torch.cat([r, v], dim=-1)
+def quaternion_inverse(q, eps=1e-8):
+    """
+    EXPECTS WXYZ
+    :param q (*, 4)
+    """
+    conj = torch.cat([q[..., :1], -q[..., 1:]], dim=-1)
+    mag = torch.square(q).sum(dim=-1, keepdim=True) + eps
+    return conj / mag
+def quaternion_slerp(t, q0, q1, eps=1e-8):
+    """
+    :param t (*, 1)  must be between 0 and 1
+    :param q0 (*, 4)
+    :param q1 (*, 4)
+    """
+    dims = q0.shape[:-1]
+    t = t.view(*dims, 1)
+    q0 = F.normalize(q0, p=2, dim=-1)
+    q1 = F.normalize(q1, p=2, dim=-1)
+    dot = (q0 * q1).sum(dim=-1, keepdim=True)
+    # make sure we give the shortest rotation path (< 180d)
+    neg = dot < 0
+    q1 = torch.where(neg, -q1, q1)
+    dot = torch.where(neg, -dot, dot)
+    angle = torch.acos(dot)
+    # if angle is too small, just do linear interpolation
+    collin = torch.abs(dot) > 1 - eps
+    fac = 1 / torch.sin(angle)
+    w0 = torch.where(collin, 1 - t, torch.sin((1 - t) * angle) * fac)
+    w1 = torch.where(collin, t, torch.sin(t * angle) * fac)
+    slerp = q0 * w0 + q1 * w1
+    return slerp
+def rotation_matrix_to_angle_axis(rotation_matrix):
+    """
+    This function is borrowed from https://github.com/kornia/kornia
+    Convert rotation matrix to Rodrigues vector
+    """
+    quaternion = rotation_matrix_to_quaternion(rotation_matrix)
+    aa = quaternion_to_angle_axis(quaternion)
+    aa[torch.isnan(aa)] = 0.0
+    return aa
+def quaternion_to_angle_axis(quaternion):
+    """
+    This function is borrowed from https://github.com/kornia/kornia
+    Convert quaternion vector to angle axis of rotation.
+    Adapted from ceres C++ library: ceres-solver/include/ceres/rotation.h
+    :param quaternion (*, 4) expects WXYZ
+    :returns angle_axis (*, 3)
+    """
+    # unpack input and compute conversion
+    q1 = quaternion[..., 1]
+    q2 = quaternion[..., 2]
+    q3 = quaternion[..., 3]
+    sin_squared_theta = q1 * q1 + q2 * q2 + q3 * q3
+    sin_theta = torch.sqrt(sin_squared_theta)
+    cos_theta = quaternion[..., 0]
+    two_theta = 2.0 * torch.where(
+        cos_theta < 0.0,
+        torch.atan2(-sin_theta, -cos_theta),
+        torch.atan2(sin_theta, cos_theta),
+    )
+    k_pos = two_theta / sin_theta
+    k_neg = 2.0 * torch.ones_like(sin_theta)
+    k = torch.where(sin_squared_theta > 0.0, k_pos, k_neg)
+    angle_axis = torch.zeros_like(quaternion)[..., :3]
+    angle_axis[..., 0] += q1 * k
+    angle_axis[..., 1] += q2 * k
+    angle_axis[..., 2] += q3 * k
+    return angle_axis
+def quaternion_to_rotation_matrix(quaternion):
+    """
+    Convert a quaternion to a rotation matrix.
+    Taken from https://github.com/kornia/kornia, based on
+    https://github.com/matthew-brett/transforms3d/blob/8965c48401d9e8e66b6a8c37c65f2fc200a076fa/transforms3d/quaternions.py#L101
+    https://github.com/tensorflow/graphics/blob/master/tensorflow_graphics/geometry/transformation/rotation_matrix_3d.py#L247
+    :param quaternion (N, 4) expects WXYZ order
+    returns rotation matrix (N, 3, 3)
+    """
+    # normalize the input quaternion
+    quaternion_norm = F.normalize(quaternion, p=2, dim=-1, eps=1e-12)
+    *dims, _ = quaternion_norm.shape
+    # unpack the normalized quaternion components
+    w, x, y, z = torch.chunk(quaternion_norm, chunks=4, dim=-1)
+    # compute the actual conversion
+    tx = 2.0 * x
+    ty = 2.0 * y
+    tz = 2.0 * z
+    twx = tx * w
+    twy = ty * w
+    twz = tz * w
+    txx = tx * x
+    txy = ty * x
+    txz = tz * x
+    tyy = ty * y
+    tyz = tz * y
+    tzz = tz * z
+    one = torch.tensor(1.0)
+    matrix = torch.stack(
+        (
+            one - (tyy + tzz),
+            txy - twz,
+            txz + twy,
+            txy + twz,
+            one - (txx + tzz),
+            tyz - twx,
+            txz - twy,
+            tyz + twx,
+            one - (txx + tyy),
+        ),
+        dim=-1,
+    ).view(*dims, 3, 3)
+    return matrix
+def angle_axis_to_quaternion(angle_axis):
+    """
+    This function is borrowed from https://github.com/kornia/kornia
+    Convert angle axis to quaternion in WXYZ order
+    :param angle_axis (*, 3)
+    :returns quaternion (*, 4) WXYZ order
+    """
+    theta_sq = torch.sum(angle_axis**2, dim=-1, keepdim=True)  # (*, 1)
+    # need to handle the zero rotation case
+    valid = theta_sq > 0
+    theta = torch.sqrt(theta_sq)
+    half_theta = 0.5 * theta
+    ones = torch.ones_like(half_theta)
+    # fill zero with the limit of sin ax / x -> a
+    k = torch.where(valid, torch.sin(half_theta) / theta, 0.5 * ones)
+    w = torch.where(valid, torch.cos(half_theta), ones)
+    quat = torch.cat([w, k * angle_axis], dim=-1)
+    return quat
+def rotation_matrix_to_quaternion(rotation_matrix, eps=1e-6):
+    """
+    This function is borrowed from https://github.com/kornia/kornia
+    Convert rotation matrix to 4d quaternion vector
+    This algorithm is based on algorithm described in
+    https://github.com/KieranWynn/pyquaternion/blob/master/pyquaternion/quaternion.py#L201
+    :param rotation_matrix (N, 3, 3)
+    """
+    *dims, m, n = rotation_matrix.shape
+    rmat_t = torch.transpose(rotation_matrix.reshape(-1, m, n), -1, -2)
+    mask_d2 = rmat_t[:, 2, 2] < eps
+    mask_d0_d1 = rmat_t[:, 0, 0] > rmat_t[:, 1, 1]
+    mask_d0_nd1 = rmat_t[:, 0, 0] < -rmat_t[:, 1, 1]
+    t0 = 1 + rmat_t[:, 0, 0] - rmat_t[:, 1, 1] - rmat_t[:, 2, 2]
+    q0 = torch.stack(
+        [
+            rmat_t[:, 1, 2] - rmat_t[:, 2, 1],
+            t0,
+            rmat_t[:, 0, 1] + rmat_t[:, 1, 0],
+            rmat_t[:, 2, 0] + rmat_t[:, 0, 2],
+        ],
+        -1,
+    )
+    t0_rep = t0.repeat(4, 1).t()
+    t1 = 1 - rmat_t[:, 0, 0] + rmat_t[:, 1, 1] - rmat_t[:, 2, 2]
+    q1 = torch.stack(
+        [
+            rmat_t[:, 2, 0] - rmat_t[:, 0, 2],
+            rmat_t[:, 0, 1] + rmat_t[:, 1, 0],
+            t1,
+            rmat_t[:, 1, 2] + rmat_t[:, 2, 1],
+        ],
+        -1,
+    )
+    t1_rep = t1.repeat(4, 1).t()
+    t2 = 1 - rmat_t[:, 0, 0] - rmat_t[:, 1, 1] + rmat_t[:, 2, 2]
+    q2 = torch.stack(
+        [
+            rmat_t[:, 0, 1] - rmat_t[:, 1, 0],
+            rmat_t[:, 2, 0] + rmat_t[:, 0, 2],
+            rmat_t[:, 1, 2] + rmat_t[:, 2, 1],
+            t2,
+        ],
+        -1,
+    )
+    t2_rep = t2.repeat(4, 1).t()
+    t3 = 1 + rmat_t[:, 0, 0] + rmat_t[:, 1, 1] + rmat_t[:, 2, 2]
+    q3 = torch.stack(
+        [
+            t3,
+            rmat_t[:, 1, 2] - rmat_t[:, 2, 1],
+            rmat_t[:, 2, 0] - rmat_t[:, 0, 2],
+            rmat_t[:, 0, 1] - rmat_t[:, 1, 0],
+        ],
+        -1,
+    )
+    t3_rep = t3.repeat(4, 1).t()
+    mask_c0 = mask_d2 * mask_d0_d1
+    mask_c1 = mask_d2 * ~mask_d0_d1
+    mask_c2 = ~mask_d2 * mask_d0_nd1
+    mask_c3 = ~mask_d2 * ~mask_d0_nd1
+    mask_c0 = mask_c0.view(-1, 1).type_as(q0)
+    mask_c1 = mask_c1.view(-1, 1).type_as(q1)
+    mask_c2 = mask_c2.view(-1, 1).type_as(q2)
+    mask_c3 = mask_c3.view(-1, 1).type_as(q3)
+    q = q0 * mask_c0 + q1 * mask_c1 + q2 * mask_c2 + q3 * mask_c3
+    q /= torch.sqrt(
+        t0_rep * mask_c0
+        + t1_rep * mask_c1
+        + t2_rep * mask_c2  # noqa
+        + t3_rep * mask_c3
+    )  # noqa
+    q *= 0.5
+    return q.reshape(*dims, 4)

slahmr/slahmr/humor/__init__.py ADDED Viewed

File without changes

slahmr/slahmr/humor/amass_utils.py ADDED Viewed

	@@ -0,0 +1,148 @@

+"""
+Taken from https://github.com/davrempe/humor
+"""
+from body_model.utils import SMPL_JOINTS
+TRAIN_DATASETS = [
+    "CMU",
+    "MPI_Limits",
+    "TotalCapture",
+    "Eyes_Japan_Dataset",
+    "KIT",
+    "BioMotionLab_NTroje",
+    "BMLmovi",
+    "EKUT",
+    "ACCAD",
+]
+TEST_DATASETS = ["Transitions_mocap", "HumanEva"]
+VAL_DATASETS = ["MPI_HDM05", "SFU", "MPI_mosh"]
+SPLITS = ["train", "val", "test", "custom"]
+SPLIT_BY = [
+    "single",  # the data path is a single .npz file. Don't split: train and test are same
+    "sequence",  # the data paths are directories of subjects. Collate and split by sequence.
+    "subject",  # the data paths are directories of datasets. Collate and split by subject.
+    "dataset",  # a single data path to the amass data root is given. The predefined datasets will be used for each split.
+]
+ROT_REPS = ["mat", "aa", "6d"]
+# these correspond to [root, left knee, right knee, left heel, right heel, left toe, right toe, left hand, right hand]
+CONTACT_ORDERING = [
+    "hips",
+    "leftLeg",
+    "rightLeg",
+    "leftFoot",
+    "rightFoot",
+    "leftToeBase",
+    "rightToeBase",
+    "leftHand",
+    "rightHand",
+]
+CONTACT_INDS = [SMPL_JOINTS[jname] for jname in CONTACT_ORDERING]
+NUM_BODY_JOINTS = len(SMPL_JOINTS) - 1
+NUM_KEYPT_VERTS = 43
+DATA_NAMES = [
+    "trans",
+    "trans_vel",
+    "root_orient",
+    "root_orient_vel",
+    "pose_body",
+    "pose_body_vel",
+    "joints",
+    "joints_vel",
+    "joints_orient_vel",
+    "verts",
+    "verts_vel",
+    "contacts",
+]
+SMPL_JOINTS_RETURN_CONFIG = {
+    "trans": True,
+    "trans_vel": True,
+    "root_orient": True,
+    "root_orient_vel": True,
+    "pose_body": True,
+    "pose_body_vel": False,
+    "joints": True,
+    "joints_vel": True,
+    "joints_orient_vel": False,
+    "verts": False,
+    "verts_vel": False,
+    "contacts": False,
+}
+SMPL_JOINTS_CONTACTS_RETURN_CONFIG = {
+    "trans": True,
+    "trans_vel": True,
+    "root_orient": True,
+    "root_orient_vel": True,
+    "pose_body": True,
+    "pose_body_vel": False,
+    "joints": True,
+    "joints_vel": True,
+    "joints_orient_vel": False,
+    "verts": False,
+    "verts_vel": False,
+    "contacts": True,
+}
+ALL_RETURN_CONFIG = {
+    "trans": True,
+    "trans_vel": True,
+    "root_orient": True,
+    "root_orient_vel": True,
+    "pose_body": True,
+    "pose_body_vel": False,
+    "joints": True,
+    "joints_vel": True,
+    "joints_orient_vel": False,
+    "verts": True,
+    "verts_vel": False,
+    "contacts": True,
+}
+RETURN_CONFIGS = {
+    "smpl+joints+contacts": SMPL_JOINTS_CONTACTS_RETURN_CONFIG,
+    "smpl+joints": SMPL_JOINTS_RETURN_CONFIG,
+    "all": ALL_RETURN_CONFIG,
+}
+def data_name_list(return_config):
+    """
+    returns the list of data values in the given configuration
+    """
+    cur_ret_cfg = RETURN_CONFIGS[return_config]
+    data_names = [k for k in DATA_NAMES if cur_ret_cfg[k]]
+    return data_names
+def data_dim(dname, rot_rep_size=9):
+    """
+    returns the dimension of the data with the given name. If the data is a rotation, returns the size with the given representation.
+    """
+    if dname in ["trans", "trans_vel", "root_orient_vel"]:
+        return 3
+    elif dname in ["root_orient"]:
+        return rot_rep_size
+    elif dname in ["pose_body"]:
+        return NUM_BODY_JOINTS * rot_rep_size
+    elif dname in ["pose_body_vel"]:
+        return NUM_BODY_JOINTS * 3
+    elif dname in ["joints", "joints_vel"]:
+        return len(SMPL_JOINTS) * 3
+    elif dname in ["joints_orient_vel"]:
+        return 1
+    elif dname in ["verts", "verts_vel"]:
+        return NUM_KEYPT_VERTS * 3
+    elif dname in ["contacts"]:
+        return len(CONTACT_ORDERING)
+    else:
+        print("The given data name %s is not valid!" % (dname))
+        exit()

slahmr/slahmr/humor/humor_model.py ADDED Viewed

	@@ -0,0 +1,1655 @@

+"""
+Taken from https://github.com/davrempe/humor
+"""
+import time, os
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.distributions.normal import Normal
+from .amass_utils import data_name_list, data_dim
+from .transforms import (
+    convert_to_rotmat,
+    compute_world2aligned_mat,
+    rotation_matrix_to_angle_axis,
+)
+from body_model.specs import SMPL_JOINTS, SMPLH_PATH
+from body_model.body_model import BodyModel
+IN_ROT_REPS = ["aa", "6d", "mat"]
+OUT_ROT_REPS = ["aa", "6d", "9d"]
+ROT_REP_SIZE = {"aa": 3, "6d": 6, "mat": 9, "9d": 9}
+NUM_SMPL_JOINTS = len(SMPL_JOINTS)
+NUM_BODY_JOINTS = NUM_SMPL_JOINTS - 1  # no root
+BETA_SIZE = 16
+POSTERIOR_OPTIONS = ["mlp"]
+PRIOR_OPTIONS = ["mlp"]
+DECODER_OPTIONS = ["mlp"]
+WORLD2ALIGN_NAME_CACHE = {
+    "root_orient": None,
+    "trans": None,
+    "joints": None,
+    "verts": None,
+    "joints_vel": None,
+    "verts_vel": None,
+    "trans_vel": None,
+    "root_orient_vel": None,
+}
+def step(
+    model, loss_func, data, dataset, device, cur_epoch, mode="train", use_gt_p=1.0
+):
+    """
+    Given data for the current training step (batch),
+    pulls out the necessary needed data,
+    runs the model,
+    calculates and returns the loss.
+    - use_gt_p : the probability of using ground truth as input to each step rather than the model's own prediction
+                 (1.0 is fully supervised, 0.0 is fully autoregressive)
+    """
+    use_sched_samp = use_gt_p < 1.0
+    batch_in, batch_out, meta = data
+    prep_data = model.prepare_input(
+        batch_in,
+        device,
+        data_out=batch_out,
+        return_input_dict=True,
+        return_global_dict=use_sched_samp,
+    )
+    if use_sched_samp:
+        x_past, x_t, gt_dict, input_dict, global_gt_dict = prep_data
+    else:
+        x_past, x_t, gt_dict, input_dict = prep_data
+    B, T, S_in, _ = x_past.size()
+    S_out = x_t.size(2)
+    if not use_sched_samp:
+        # fully supervised phase
+        # start by using gt at every step, so just form all steps from all sequences into one large batch
+        #       and get per-step predictions
+        x_past_batched = x_past.reshape((B * T, S_in, -1))
+        x_t_batched = x_t.reshape((B * T, S_out, -1))
+        out_dict = model(x_past_batched, x_t_batched)
+    else:
+        # in scheduled sampling or fully autoregressive phase
+        init_input_dict = dict()
+        for k in input_dict.keys():
+            init_input_dict[k] = input_dict[k][
+                :, 0, :, :
+            ]  # only need first step for init
+        # this out_dict is the global state
+        sched_samp_out = model.scheduled_sampling(
+            x_past,
+            x_t,
+            init_input_dict,
+            p=use_gt_p,
+            gender=meta["gender"],
+            betas=meta["betas"].to(device),
+            need_global_out=(not model.detach_sched_samp),
+        )
+        if model.detach_sched_samp:
+            out_dict = sched_samp_out
+        else:
+            out_dict, _ = sched_samp_out
+        # gt must be global state for supervision in this case
+        if not model.detach_sched_samp:
+            print("USING global supervision")
+            gt_dict = global_gt_dict
+    # loss can be computed per output step in parallel
+    # batch dicts accordingly
+    for k in out_dict.keys():
+        if k == "posterior_distrib" or k == "prior_distrib":
+            m, v = out_dict[k]
+            m = m.reshape((B * T, -1))
+            v = v.reshape((B * T, -1))
+            out_dict[k] = (m, v)
+        else:
+            out_dict[k] = out_dict[k].reshape((B * T * S_out, -1))
+    for k in gt_dict.keys():
+        gt_dict[k] = gt_dict[k].reshape((B * T * S_out, -1))
+    gender_in = np.broadcast_to(
+        np.array(meta["gender"]).reshape((B, 1, 1, 1)), (B, T, S_out, 1)
+    )
+    gender_in = gender_in.reshape((B * T * S_out, 1))
+    betas_in = meta["betas"].reshape((B, T, 1, -1)).expand((B, T, S_out, 16)).to(device)
+    betas_in = betas_in.reshape((B * T * S_out, 16))
+    loss, stats_dict = loss_func(
+        out_dict, gt_dict, cur_epoch, gender=gender_in, betas=betas_in
+    )
+    return loss, stats_dict
+class HumorModel(nn.Module):
+    def __init__(
+        self,
+        in_rot_rep="aa",
+        out_rot_rep="aa",
+        latent_size=48,
+        steps_in=1,
+        conditional_prior=True,  # use a learned prior rather than standard normal
+        output_delta=True,  # output change in state from decoder rather than next step directly
+        posterior_arch="mlp",
+        decoder_arch="mlp",
+        prior_arch="mlp",
+        model_data_config="smpl+joints+contacts",
+        detach_sched_samp=True,  # if true, detaches outputs of previous step so gradients don't flow through many steps
+        model_use_smpl_joint_inputs=False,  # if true, uses smpl joints rather than regressed joints to input at next step (during rollout and sched samp)
+        model_smpl_batch_size=1,  # if using smpl joint inputs this should be batch_size of the smpl model (aka data input to rollout)
+    ):
+        super(HumorModel, self).__init__()
+        self.ignore_keys = []
+        self.steps_in = steps_in
+        self.steps_out = 1
+        self.out_step_size = 1
+        self.detach_sched_samp = detach_sched_samp
+        self.output_delta = output_delta
+        if self.steps_out > 1:
+            raise NotImplementedError("Only supported single step output currently.")
+        if out_rot_rep not in OUT_ROT_REPS:
+            raise Exception(
+                "Not a valid output rotation representation: %s" % (out_rot_rep)
+            )
+        if in_rot_rep not in IN_ROT_REPS:
+            raise Exception(
+                "Not a valid input rotation representation: %s" % (in_rot_rep)
+            )
+        self.out_rot_rep = out_rot_rep
+        self.in_rot_rep = in_rot_rep
+        if posterior_arch not in POSTERIOR_OPTIONS:
+            raise Exception("Not a valid encoder architecture: %s" % (posterior_arch))
+        if decoder_arch not in DECODER_OPTIONS:
+            raise Exception("Not a valid decoder architecture: %s" % (decoder_arch))
+        if conditional_prior and prior_arch not in PRIOR_OPTIONS:
+            raise Exception("Not a valid prior architecture: %s" % (prior_arch))
+        self.posterior_arch = posterior_arch
+        self.decoder_arch = decoder_arch
+        self.prior_arch = prior_arch
+        # get the list of data names for this config
+        self.data_names = data_name_list(model_data_config)
+        self.aux_in_data_names = (
+            self.aux_out_data_names
+        ) = None  # auxiliary data will be returned as part of the input/output dictionary, but not the actual network input/output tensor
+        self.pred_contacts = False
+        if (
+            model_data_config.find("contacts") >= 0
+        ):  # network is outputting contact classification as well and need to supervise, but not given as input to net.
+            self.data_names.remove("contacts")
+            self.aux_out_data_names = ["contacts"]
+            self.pred_contacts = True
+        self.need_trans2joint = (
+            "joints" in self.data_names or "verts" in self.data_names
+        )
+        self.model_data_config = model_data_config
+        self.input_rot_dim = ROT_REP_SIZE[self.in_rot_rep]
+        self.input_dim_list = [
+            data_dim(dname, rot_rep_size=self.input_rot_dim)
+            for dname in self.data_names
+        ]
+        self.input_data_dim = sum(self.input_dim_list)
+        self.output_rot_dim = ROT_REP_SIZE[self.out_rot_rep]
+        self.output_dim_list = [
+            data_dim(dname, rot_rep_size=self.output_rot_dim)
+            for dname in self.data_names
+        ]
+        self.delta_output_dim_list = [
+            data_dim(dname, rot_rep_size=ROT_REP_SIZE["mat"])
+            for dname in self.data_names
+        ]
+        if self.pred_contacts:
+            # account for contact classification output
+            self.output_dim_list.append(data_dim("contacts"))
+            self.delta_output_dim_list.append(data_dim("contacts"))
+        self.output_data_dim = sum(self.output_dim_list)
+        self.latent_size = latent_size
+        past_data_dim = self.steps_in * self.input_data_dim
+        t_data_dim = self.steps_out * self.input_data_dim
+        # posterior encoder (given past and future, predict latent transition distribution)
+        print("Using posterior architecture: %s" % (self.posterior_arch))
+        if self.posterior_arch == "mlp":
+            layer_list = [
+                past_data_dim + t_data_dim,
+                1024,
+                1024,
+                1024,
+                1024,
+                self.latent_size * 2,
+            ]
+            self.encoder = MLP(
+                layers=layer_list,  # mu and sigma output
+                nonlinearity=nn.ReLU,
+                use_gn=True,
+            )
+        # decoder (given past and latent transition, predict future) for the immediate next step
+        print("Using decoder architecture: %s" % (self.decoder_arch))
+        decoder_input_dim = past_data_dim + self.latent_size
+        if self.decoder_arch == "mlp":
+            layer_list = [decoder_input_dim, 1024, 1024, 512, self.output_data_dim]
+            self.decoder = MLP(
+                layers=layer_list,
+                nonlinearity=nn.ReLU,
+                use_gn=True,
+                skip_input_idx=past_data_dim,  # skip connect the latent to every layer
+            )
+        # prior (if conditional, given past predict latent transition distribution)
+        self.use_conditional_prior = conditional_prior
+        if self.use_conditional_prior:
+            print("Using prior architecture: %s" % (self.prior_arch))
+            layer_list = [past_data_dim, 1024, 1024, 1024, 1024, self.latent_size * 2]
+            self.prior_net = MLP(
+                layers=layer_list,  # mu and sigma output
+                nonlinearity=nn.ReLU,
+                use_gn=True,
+            )
+        else:
+            print("Using standard normal prior.")
+        self.use_smpl_joint_inputs = model_use_smpl_joint_inputs
+        self.smpl_batch_size = model_smpl_batch_size
+        if self.use_smpl_joint_inputs:
+            # need a body model to compute the joints after each step.
+            print(
+                "Using SMPL joints rather than regressed joints as input at each step for roll out and scheduled sampling..."
+            )
+            male_bm_path = os.path.join(SMPLH_PATH, "male/model.npz")
+            self.male_bm = BodyModel(
+                bm_path=male_bm_path, num_betas=16, batch_size=self.smpl_batch_size
+            )
+            female_bm_path = os.path.join(SMPLH_PATH, "female/model.npz")
+            self.female_bm = BodyModel(
+                bm_path=female_bm_path, num_betas=16, batch_size=self.smpl_batch_size
+            )
+            neutral_bm_path = os.path.join(SMPLH_PATH, "neutral/model.npz")
+            self.neutral_bm = BodyModel(
+                bm_path=neutral_bm_path, num_betas=16, batch_size=self.smpl_batch_size
+            )
+            self.bm_dict = {
+                "male": self.male_bm,
+                "female": self.female_bm,
+                "neutral": self.neutral_bm,
+            }
+            for p in self.male_bm.parameters():
+                p.requires_grad = False
+            for p in self.female_bm.parameters():
+                p.requires_grad = False
+            for p in self.neutral_bm.parameters():
+                p.requires_grad = False
+            self.ignore_keys = ["male_bm", "female_bm", "neutral_bm"]
+    def prepare_input(
+        self,
+        data_in,
+        device,
+        data_out=None,
+        return_input_dict=False,
+        return_global_dict=False,
+    ):
+        """
+        Concatenates input and output data as expected by the model.
+        Also creates a dictionary of GT outputs for use in computing the loss. And optionally
+        a dictionary of inputs.
+        """
+        #
+        # input data
+        #
+        in_unnorm_data_list = []
+        for k in self.data_names:
+            cur_dat = data_in[k].to(device)
+            B, T = cur_dat.size(0), cur_dat.size(1)
+            cur_unnorm_dat = cur_dat.reshape((B, T, self.steps_in, -1))
+            in_unnorm_data_list.append(cur_unnorm_dat)
+        x_past = torch.cat(in_unnorm_data_list, axis=3)
+        input_dict = None
+        if return_input_dict:
+            input_dict = {k: v for k, v in zip(self.data_names, in_unnorm_data_list)}
+            if self.aux_in_data_names is not None:
+                for k in self.aux_in_data_names:
+                    cur_dat = data_in[k].to(device)
+                    B, T = cur_dat.size(0), cur_dat.size(1)
+                    cur_unnorm_dat = cur_dat.reshape((B, T, self.steps_in, -1))
+                    input_dict[k] = cur_unnorm_dat
+        #
+        # output
+        #
+        if data_out is not None:
+            out_unnorm_data_list = []
+            for k in self.data_names:
+                cur_dat = data_out[k].to(device)
+                B, T = cur_dat.size(0), cur_dat.size(1)
+                cur_unnorm_dat = cur_dat.reshape((B, T, self.steps_out, -1))
+                out_unnorm_data_list.append(cur_unnorm_dat)
+            x_t = torch.cat(out_unnorm_data_list, axis=3)
+            gt_dict = {k: v for k, v in zip(self.data_names, out_unnorm_data_list)}
+            if self.aux_out_data_names is not None:
+                for k in self.aux_out_data_names:
+                    cur_dat = data_out[k].to(device)
+                    B, T = cur_dat.size(0), cur_dat.size(1)
+                    cur_unnorm_dat = cur_dat.reshape((B, T, self.steps_out, -1))
+                    gt_dict[k] = cur_unnorm_dat
+            return_list = [x_past, x_t, gt_dict]
+            if return_input_dict:
+                return_list.append(input_dict)
+            #
+            # global
+            #
+            if return_global_dict:
+                global_gt_dict = dict()
+                for k in self.data_names:
+                    global_k = "global_" + k
+                    cur_dat = data_out[global_k].to(device)
+                    B, T = cur_dat.size(0), cur_dat.size(1)
+                    # expand each to have steps_out since originally they are just B x T x ... x D
+                    cur_dat = cur_dat.reshape((B, T, 1, -1)).expand_as(gt_dict[k])
+                    global_gt_dict[k] = cur_dat
+                if self.aux_out_data_names is not None:
+                    for k in self.aux_out_data_names:
+                        global_k = "global_" + k
+                        cur_dat = data_out[global_k].to(device)
+                        B, T = cur_dat.size(0), cur_dat.size(1)
+                        # expand each to have steps_out since originally they are just B x T x ... x D
+                        cur_dat = cur_dat.reshape((B, T, 1, -1)).expand_as(gt_dict[k])
+                        global_gt_dict[k] = cur_dat
+                return_list.append(global_gt_dict)
+            return tuple(return_list)
+        else:
+            if return_input_dict:
+                return x_past, input_dict
+            else:
+                return x_past
+    def split_output(self, decoder_out, convert_rots=True):
+        """
+        Given the output of the decoder, splits into each state component.
+        Also transform rotation representation to matrices.
+        Input:
+        - decoder_out  (B x steps_out x D)
+        Returns:
+        - output dict
+        """
+        B = decoder_out.size(0)
+        decoder_out = decoder_out.reshape((B, self.steps_out, -1))
+        # collect outputs
+        name_list = self.data_names
+        if self.aux_out_data_names is not None:
+            name_list = name_list + self.aux_out_data_names
+        idx_list = (
+            self.delta_output_dim_list if self.output_delta else self.output_dim_list
+        )
+        out_dict = dict()
+        sidx = 0
+        for cur_name, cur_idx in zip(name_list, idx_list):
+            eidx = sidx + cur_idx
+            out_dict[cur_name] = decoder_out[:, :, sidx:eidx]
+            sidx = eidx
+        # transform rotations
+        if convert_rots and not self.output_delta:  # output delta already gives rotmats
+            if "root_orient" in self.data_names:
+                out_dict["root_orient"] = convert_to_rotmat(
+                    out_dict["root_orient"], rep=self.out_rot_rep
+                )
+            if "pose_body" in self.data_names:
+                out_dict["pose_body"] = convert_to_rotmat(
+                    out_dict["pose_body"], rep=self.out_rot_rep
+                )
+        return out_dict
+    def forward(self, x_past, x_t):
+        """
+        single step full forward pass. This uses the posterior for sampling, not the prior.
+        Input:
+        - x_past (B x steps_in x D)
+        - x_t    (B x steps_out x D)
+        Returns dict of:
+        - x_pred (B x steps_out x D)
+        - posterior_distrib (Normal(mu, sigma))
+        - prior_distrib (Normal(mu, sigma))
+        """
+        B, _, D = x_past.size()
+        past_in = x_past.reshape((B, -1))
+        t_in = x_t.reshape((B, -1))
+        x_pred_dict = self.single_step(past_in, t_in)
+        return x_pred_dict
+    def single_step(self, past_in, t_in):
+        """
+        single step that computes both prior and posterior for training. Samples from posterior
+        """
+        B = past_in.size(0)
+        # use past and future to encode latent transition
+        qm, qv = self.posterior(past_in, t_in)
+        # prior
+        pm, pv = None, None
+        if self.use_conditional_prior:
+            # predict prior based on past
+            pm, pv = self.prior(past_in)
+        else:
+            # use standard normal
+            pm, pv = torch.zeros_like(qm), torch.ones_like(qv)
+        # sample from posterior using reparam trick
+        z = self.rsample(qm, qv)
+        # decode to get next step
+        decoder_out = self.decode(z, past_in)
+        decoder_out = decoder_out.reshape(
+            (B, self.steps_out, -1)
+        )  # B x steps_out x D_out
+        # split output predictions and transform out rotations to matrices
+        x_pred_dict = self.split_output(decoder_out)
+        x_pred_dict["posterior_distrib"] = (qm, qv)
+        x_pred_dict["prior_distrib"] = (pm, pv)
+        return x_pred_dict
+    def prior(self, past_in):
+        """
+        Encodes the posterior distribution using the past and future states.
+        Input:
+        - past_in (B x steps_in*D)
+        """
+        prior_out = self.prior_net(past_in)
+        mean = prior_out[:, : self.latent_size]
+        logvar = prior_out[:, self.latent_size :]
+        var = torch.exp(logvar)
+        return mean, var
+    def posterior(self, past_in, t_in):
+        """
+        Encodes the posterior distribution using the past and future states.
+        Input:
+        - past_in (B x steps_in*D)
+        - t_in    (B x steps_out*D)
+        """
+        encoder_in = torch.cat([past_in, t_in], axis=1)
+        encoder_out = self.encoder(encoder_in)
+        mean = encoder_out[:, : self.latent_size]
+        logvar = encoder_out[:, self.latent_size :]
+        var = torch.exp(logvar)
+        return mean, var
+    def rsample(self, mu, var):
+        """
+        Return gaussian sample of (mu, var) using reparameterization trick.
+        """
+        eps = torch.randn_like(mu)
+        z = mu + eps * torch.sqrt(var)
+        return z
+    def decode(self, z, past_in):
+        """
+        Decodes prediction from the latent transition and past states
+        Input:
+        - z       (B x latent_size)
+        - past_in (B x steps_in*D)
+        Returns:
+        - decoder_out (B x steps_out*D)
+        """
+        B = z.size(0)
+        decoder_in = torch.cat([past_in, z], axis=1)
+        decoder_out = self.decoder(decoder_in).reshape((B, 1, -1))
+        if self.output_delta:
+            # network output is the residual, add to the input to get final output
+            step_in = past_in.reshape((B, self.steps_in, -1))[
+                :, -1:, :
+            ]  # most recent input step
+            final_out_list = []
+            in_sidx = out_sidx = 0
+            decode_out_dim_list = self.output_dim_list
+            if self.pred_contacts:
+                decode_out_dim_list = decode_out_dim_list[:-1]  # do contacts separately
+            for in_dim_idx, out_dim_idx, data_name in zip(
+                self.input_dim_list, decode_out_dim_list, self.data_names
+            ):
+                in_eidx = in_sidx + in_dim_idx
+                out_eidx = out_sidx + out_dim_idx
+                # add residual to input (and transform as necessary for rotations)
+                in_val = step_in[:, :, in_sidx:in_eidx]
+                out_val = decoder_out[:, :, out_sidx:out_eidx]
+                if data_name in ["root_orient", "pose_body"]:
+                    if self.in_rot_rep != "mat":
+                        in_val = convert_to_rotmat(in_val, rep=self.in_rot_rep)
+                    out_val = convert_to_rotmat(out_val, rep=self.out_rot_rep)
+                    in_val = in_val.reshape((B, 1, -1, 3, 3))
+                    out_val = out_val.reshape((B, self.steps_out, -1, 3, 3))
+                    rot_in = torch.matmul(out_val, in_val).reshape(
+                        (B, self.steps_out, -1)
+                    )  # rotate by predicted residual
+                    final_out_list.append(rot_in)
+                else:
+                    final_out_list.append(out_val + in_val)
+                in_sidx = in_eidx
+                out_sidx = out_eidx
+            if self.pred_contacts:
+                final_out_list.append(decoder_out[:, :, out_sidx:])
+            decoder_out = torch.cat(final_out_list, dim=2)
+        decoder_out = decoder_out.reshape((B, -1))
+        return decoder_out
+    def scheduled_sampling(
+        self,
+        x_past,
+        x_t,
+        init_input_dict,
+        p=0.5,
+        gender=None,
+        betas=None,
+        need_global_out=True,
+    ):
+        """
+        Given all inputs and ground truth outputs for all steps, roll out model predictions
+        where at each step use the GT input with prob p, otherwise use own previous output.
+        Input:
+        - x_past (B x T x steps_in x D)
+        - x_t    (B x T x steps_out x D)
+        - init_input_dict : dictionary of each initial state (B x steps_in x D), rotations should be matrices
+        - p : probability of using the GT input at each step of the sequence
+        - gender/betas only required if self.use_smpl_joint_inputs is true (used to decide the SMPL body model)
+        """
+        B, T, S, D = x_past.size()
+        S_out = x_t.size(2)
+        J = len(SMPL_JOINTS)
+        cur_input_dict = init_input_dict  # this is the predicted input dict
+        # initial input must be from GT since we don't have any predictions yet
+        past_in = x_past[:, 0, :, :].reshape((B, -1))
+        t_in = x_t[:, 0, :, :].reshape((B, -1))
+        global_world2local_rot = (
+            torch.eye(3).reshape((1, 1, 3, 3)).expand((B, 1, 3, 3)).to(x_past)
+        )
+        global_world2local_trans = torch.zeros((B, 1, 3)).to(x_past)
+        trans2joint = torch.zeros((B, 1, 1, 3)).to(x_past)
+        if self.need_trans2joint:
+            trans2joint = -torch.cat(
+                [cur_input_dict["joints"][:, -1, :2], torch.zeros((B, 1)).to(x_past)],
+                axis=1,
+            ).reshape(
+                (B, 1, 1, 3)
+            )  # same for whole sequence
+        pred_local_seq = []
+        pred_global_seq = []
+        for t in range(T):
+            # sample next step from model
+            x_pred_dict = self.single_step(past_in, t_in)
+            # save output
+            pred_local_seq.append(x_pred_dict)
+            # output is the actual regressed joints, but input to next step can use smpl joints
+            x_pred_smpl_joints = None
+            if self.use_smpl_joint_inputs and gender is not None and betas is not None:
+                # this assumes the model is actually outputting everything we need to run SMPL
+                # also assumes single output step
+                smpl_trans = x_pred_dict["trans"][:, 0:1].reshape(
+                    (B, 3)
+                )  # only want immediate next frame
+                smpl_root_orient = rotation_matrix_to_angle_axis(
+                    x_pred_dict["root_orient"][:, 0:1].reshape((B, 3, 3))
+                ).reshape((B, 3))
+                smpl_betas = betas[:, 0, :]
+                smpl_pose_body = rotation_matrix_to_angle_axis(
+                    x_pred_dict["pose_body"][:, 0:1].reshape((B * (J - 1), 3, 3))
+                ).reshape((B, (J - 1) * 3))
+                smpl_vals = [smpl_trans, smpl_root_orient, smpl_betas, smpl_pose_body]
+                # batch may be a mix of genders, so need to carefully use the corresponding SMPL body model
+                gender_names = ["male", "female", "neutral"]
+                pred_joints = []
+                prev_nbidx = 0
+                cat_idx_map = np.ones((B), dtype=np.int) * -1
+                for gender_name in gender_names:
+                    gender_idx = np.array(gender) == gender_name
+                    nbidx = np.sum(gender_idx)
+                    cat_idx_map[gender_idx] = np.arange(
+                        prev_nbidx, prev_nbidx + nbidx, dtype=np.int
+                    )
+                    prev_nbidx += nbidx
+                    gender_smpl_vals = [val[gender_idx] for val in smpl_vals]
+                    # need to pad extra frames with zeros in case not as long as expected
+                    pad_size = self.smpl_batch_size - nbidx
+                    if pad_size == B:
+                        # skip if no frames for this gender
+                        continue
+                    pad_list = gender_smpl_vals
+                    if pad_size < 0:
+                        raise Exception(
+                            "SMPL model batch size not large enough to accomodate!"
+                        )
+                    elif pad_size > 0:
+                        pad_list = self.zero_pad_tensors(pad_list, pad_size)
+                    # reconstruct SMPL
+                    cur_pred_trans, cur_pred_orient, cur_betas, cur_pred_pose = pad_list
+                    bm = self.bm_dict[gender_name]
+                    pred_body = bm(
+                        pose_body=cur_pred_pose,
+                        betas=cur_betas,
+                        root_orient=cur_pred_orient,
+                        trans=cur_pred_trans,
+                    )
+                    if pad_size > 0:
+                        pred_joints.append(pred_body.Jtr[:-pad_size])
+                    else:
+                        pred_joints.append(pred_body.Jtr)
+                # cat all genders and reorder to original batch ordering
+                x_pred_smpl_joints = torch.cat(pred_joints, axis=0)[
+                    :, : len(SMPL_JOINTS), :
+                ].reshape((B, 1, -1))
+                x_pred_smpl_joints = x_pred_smpl_joints[cat_idx_map]
+            # prepare predicted input to next step in case needed
+            # update input dict with new frame
+            del_keys = []
+            for k in cur_input_dict.keys():
+                if k in x_pred_dict:
+                    # drop oldest frame and add new prediction
+                    keep_frames = cur_input_dict[k][:, 1:, :]
+                    # print(keep_frames.size())
+                    if (
+                        k == "joints"
+                        and self.use_smpl_joint_inputs
+                        and x_pred_smpl_joints is not None
+                    ):
+                        # print('Using SMPL joints rather than regressed joints...')
+                        if self.detach_sched_samp:
+                            cur_input_dict[k] = torch.cat(
+                                [keep_frames, x_pred_smpl_joints.detach()], axis=1
+                            )
+                        else:
+                            cur_input_dict[k] = torch.cat(
+                                [keep_frames, x_pred_smpl_joints], axis=1
+                            )
+                    else:
+                        if self.detach_sched_samp:
+                            cur_input_dict[k] = torch.cat(
+                                [keep_frames, x_pred_dict[k][:, 0:1, :].detach()],
+                                axis=1,
+                            )
+                        else:
+                            cur_input_dict[k] = torch.cat(
+                                [keep_frames, x_pred_dict[k][:, 0:1, :]], axis=1
+                            )
+                    # print(cur_input_dict[k].size())
+                else:
+                    del_keys.append(k)
+            for k in del_keys:
+                del cur_input_dict[k]  # don't need it anymore
+            # get world2aligned rot and translation
+            if self.detach_sched_samp:
+                root_orient_mat = (
+                    x_pred_dict["root_orient"][:, 0, :].reshape((B, 3, 3)).detach()
+                )
+                world2aligned_rot = compute_world2aligned_mat(root_orient_mat)
+                world2aligned_trans = torch.cat(
+                    [
+                        -x_pred_dict["trans"][:, 0, :2].detach(),
+                        torch.zeros((B, 1)).to(x_past),
+                    ],
+                    axis=1,
+                )
+            else:
+                root_orient_mat = x_pred_dict["root_orient"][:, 0, :].reshape((B, 3, 3))
+                world2aligned_rot = compute_world2aligned_mat(root_orient_mat)
+                world2aligned_trans = torch.cat(
+                    [-x_pred_dict["trans"][:, 0, :2], torch.zeros((B, 1)).to(x_past)],
+                    axis=1,
+                )
+            #
+            # transform inputs to this local frame for next step
+            #
+            cur_input_dict = self.apply_world2local_trans(
+                world2aligned_trans,
+                world2aligned_rot,
+                trans2joint,
+                cur_input_dict,
+                cur_input_dict,
+                invert=False,
+            )
+            # convert rots to correct input format
+            if self.in_rot_rep == "aa":
+                if "root_orient" in self.data_names:
+                    cur_input_dict["root_orient"] = rotation_matrix_to_angle_axis(
+                        cur_input_dict["root_orient"].reshape((B * S, 3, 3))
+                    ).reshape((B, S, 3))
+                if "pose_body" in self.data_names:
+                    cur_input_dict["pose_body"] = rotation_matrix_to_angle_axis(
+                        cur_input_dict["pose_body"].reshape((B * S * (J - 1), 3, 3))
+                    ).reshape((B, S, (J - 1) * 3))
+            elif self.in_rot_rep == "6d":
+                if "root_orient" in self.data_names:
+                    cur_input_dict["root_orient"] = cur_input_dict["root_orient"][
+                        :, :, :6
+                    ]
+                if "pose_body" in self.data_names:
+                    cur_input_dict["pose_body"] = (
+                        cur_input_dict["pose_body"]
+                        .reshape((B, S, J - 1, 9))[:, :, :, :6]
+                        .reshape((B, S, (J - 1) * 6))
+                    )
+            if need_global_out:
+                #
+                # compute current world output and update world2local transform
+                #
+                cur_world_dict = dict()
+                cur_world_dict = self.apply_world2local_trans(
+                    global_world2local_trans,
+                    global_world2local_rot,
+                    trans2joint,
+                    x_pred_dict,
+                    cur_world_dict,
+                    invert=True,
+                )
+                if self.detach_sched_samp:
+                    global_world2local_trans = torch.cat(
+                        [
+                            -cur_world_dict["trans"][:, 0:1, :2].detach(),
+                            torch.zeros((B, 1, 1)).to(x_past),
+                        ],
+                        axis=2,
+                    )
+                else:
+                    global_world2local_trans = torch.cat(
+                        [
+                            -cur_world_dict["trans"][:, 0:1, :2],
+                            torch.zeros((B, 1, 1)).to(x_past),
+                        ],
+                        axis=2,
+                    )
+                global_world2local_rot = torch.matmul(
+                    global_world2local_rot, world2aligned_rot.reshape((B, 1, 3, 3))
+                )
+                pred_global_seq.append(cur_world_dict)
+            if t + 1 < T:
+                # choose whether next step will use GT or predicted inputs and prepare them
+                if np.random.random_sample() < p:
+                    # use GT
+                    past_in = x_past[:, t + 1, :, :].reshape((B, -1))
+                else:
+                    # cat all inputs together to form past_in
+                    in_data_list = []
+                    for k in self.data_names:
+                        in_data_list.append(cur_input_dict[k])
+                    past_in = torch.cat(in_data_list, axis=2)
+                    past_in = past_in.reshape((B, -1))
+                # GT output is the same no matter what
+                t_in = x_t[:, t + 1, :, :].reshape((B, -1))
+        if need_global_out:
+            # aggregate pred_seq
+            pred_global_seq_out = dict()
+            for k in pred_global_seq[0].keys():
+                if k == "posterior_distrib" or k == "prior_distrib":
+                    m = torch.stack(
+                        [pred_global_seq[i][k][0] for i in range(len(pred_global_seq))],
+                        axis=1,
+                    )
+                    v = torch.stack(
+                        [pred_global_seq[i][k][1] for i in range(len(pred_global_seq))],
+                        axis=1,
+                    )
+                    pred_global_seq_out[k] = (m, v)
+                else:
+                    pred_global_seq_out[k] = torch.stack(
+                        [pred_global_seq[i][k] for i in range(len(pred_global_seq))],
+                        axis=1,
+                    )
+        # aggregate pred_seq
+        pred_local_seq_out = dict()
+        for k in pred_local_seq[0].keys():
+            # print(k)
+            if k == "posterior_distrib" or k == "prior_distrib":
+                m = torch.stack(
+                    [pred_local_seq[i][k][0] for i in range(len(pred_local_seq))],
+                    axis=1,
+                )
+                v = torch.stack(
+                    [pred_local_seq[i][k][1] for i in range(len(pred_local_seq))],
+                    axis=1,
+                )
+                pred_local_seq_out[k] = (m, v)
+            else:
+                pred_local_seq_out[k] = torch.stack(
+                    [pred_local_seq[i][k] for i in range(len(pred_local_seq))], axis=1
+                )
+        if need_global_out:
+            return pred_global_seq_out, pred_local_seq_out
+        else:
+            return pred_local_seq_out
+    def apply_world2local_trans(
+        self,
+        world2local_trans,
+        world2local_rot,
+        trans2joint,
+        input_dict,
+        output_dict,
+        invert=False,
+    ):
+        """
+        Applies the given world2local transformation to the data in input_dict and stores the result in output_dict.
+        If invert is true, applies local2world.
+        - world2local_trans : B x 3 or B x 1 x 3
+        - world2local_rot :   B x 3 x 3 or B x 1 x 3 x 3
+        - trans2joint : B x 1 x 1 x 3
+        """
+        B = world2local_trans.size(0)
+        world2local_rot = world2local_rot.reshape((B, 1, 3, 3))
+        world2local_trans = world2local_trans.reshape((B, 1, 3))
+        trans2joint = trans2joint.reshape((B, 1, 1, 3))
+        if invert:
+            local2world_rot = world2local_rot.transpose(3, 2)
+        for k, v in input_dict.items():
+            # apply differently depending on which data value it is
+            if k not in WORLD2ALIGN_NAME_CACHE:
+                # frame of reference is irrelevant, just copy to output
+                output_dict[k] = input_dict[k]
+                continue
+            S = input_dict[k].size(1)
+            if k in ["root_orient"]:
+                # rot: B x S x 3 x 3 sized rotation matrix input
+                input_mat = input_dict[k].reshape(
+                    (B, S, 3, 3)
+                )  # make sure not B x S x 9
+                if invert:
+                    output_dict[k] = torch.matmul(local2world_rot, input_mat).reshape(
+                        (B, S, 9)
+                    )
+                else:
+                    output_dict[k] = torch.matmul(world2local_rot, input_mat).reshape(
+                        (B, S, 9)
+                    )
+            elif k in ["trans"]:
+                # trans + rot : B x S x 3
+                input_trans = input_dict[k]
+                if invert:
+                    output_trans = torch.matmul(
+                        local2world_rot, input_trans.reshape((B, S, 3, 1))
+                    )[:, :, :, 0]
+                    output_trans = output_trans - world2local_trans
+                    output_dict[k] = output_trans
+                else:
+                    input_trans = input_trans + world2local_trans
+                    output_dict[k] = torch.matmul(
+                        world2local_rot, input_trans.reshape((B, S, 3, 1))
+                    )[:, :, :, 0]
+            elif k in ["joints", "verts"]:
+                # trans + joint + rot : B x S x J x 3
+                J = input_dict[k].size(2) // 3
+                input_pts = input_dict[k].reshape((B, S, J, 3))
+                if invert:
+                    input_pts = input_pts + trans2joint
+                    output_pts = torch.matmul(
+                        local2world_rot.reshape((B, 1, 1, 3, 3)),
+                        input_pts.reshape((B, S, J, 3, 1)),
+                    )[:, :, :, :, 0]
+                    output_pts = (
+                        output_pts
+                        - trans2joint
+                        - world2local_trans.reshape((B, 1, 1, 3))
+                    )
+                    output_dict[k] = output_pts.reshape((B, S, J * 3))
+                else:
+                    input_pts = (
+                        input_pts
+                        + world2local_trans.reshape((B, 1, 1, 3))
+                        + trans2joint
+                    )
+                    output_pts = torch.matmul(
+                        world2local_rot.reshape((B, 1, 1, 3, 3)),
+                        input_pts.reshape((B, S, J, 3, 1)),
+                    )[:, :, :, :, 0]
+                    output_pts = output_pts - trans2joint
+                    output_dict[k] = output_pts.reshape((B, S, J * 3))
+            elif k in ["joints_vel", "verts_vel"]:
+                # rot : B x S x J x 3
+                J = input_dict[k].size(2) // 3
+                input_pts = input_dict[k].reshape((B, S, J, 3, 1))
+                if invert:
+                    outuput_pts = torch.matmul(
+                        local2world_rot.reshape((B, 1, 1, 3, 3)), input_pts
+                    )[:, :, :, :, 0]
+                    output_dict[k] = outuput_pts.reshape((B, S, J * 3))
+                else:
+                    output_pts = torch.matmul(
+                        world2local_rot.reshape((B, 1, 1, 3, 3)), input_pts
+                    )[:, :, :, :, 0]
+                    output_dict[k] = output_pts.reshape((B, S, J * 3))
+            elif k in ["trans_vel", "root_orient_vel"]:
+                # rot : B x S x 3
+                input_pts = input_dict[k].reshape((B, S, 3, 1))
+                if invert:
+                    output_dict[k] = torch.matmul(local2world_rot, input_pts)[
+                        :, :, :, 0
+                    ]
+                else:
+                    output_dict[k] = torch.matmul(world2local_rot, input_pts)[
+                        :, :, :, 0
+                    ]
+            else:
+                print(
+                    "Received an unexpected key when transforming world2local: %s!"
+                    % (k)
+                )
+                exit()
+        return output_dict
+    def zero_pad_tensors(self, pad_list, pad_size):
+        """
+        Assumes tensors in pad_list are B x D
+        """
+        new_pad_list = []
+        for pad_idx, pad_tensor in enumerate(pad_list):
+            padding = torch.zeros((pad_size, pad_tensor.size(1))).to(pad_tensor)
+            new_pad_list.append(torch.cat([pad_tensor, padding], dim=0))
+        return new_pad_list
+    def roll_out(
+        self,
+        x_past,
+        init_input_dict,
+        num_steps,
+        use_mean=False,
+        z_seq=None,
+        return_prior=False,
+        gender=None,
+        betas=None,
+        return_z=False,
+        canonicalize_input=False,
+        uncanonicalize_output=False,
+    ):
+        """
+        Given input for first step, roll out using own output the entire time by sampling from the prior.
+        Returns the global trajectory.
+        Input:
+        - x_past (B x steps_in x D_in)
+        - initial_input_dict : dictionary of each initial state (B x steps_in x D), rotations should be matrices
+                                (assumes initial state is already in its local coordinate system (translation at [0,0,z] and aligned))
+        - num_steps : the number of timesteps to roll out
+        - use_mean : if True, uses the mean of latent distribution instead of sampling
+        - z_seq : (B x steps_out x D) if given, uses as the latent input to decoder at each step rather than sampling
+        - return_prior : if True, also returns the output of the conditional prior at each step
+        -gender : list of e.g. ['male', 'female', etc..] of length B
+        -betas : B x steps_in x D
+        -return_z : returns the sampled z sequence in addition to the output
+        - canonicalize_input : if true, the input initial state is assumed to not be in the local aligned coordinate system. It will be transformed before using.
+        - uncanonicalize_output : if true and canonicalize_input=True, will transform output back into the input frame rather than return in canonical frame.
+        Returns:
+        - x_pred - dict of (B x num_steps x D_out) for each value. Rotations are all matrices.
+        """
+        J = len(SMPL_JOINTS)
+        cur_input_dict = init_input_dict
+        # need to transform init input to local frame
+        world2aligned_rot = world2aligned_trans = None
+        if canonicalize_input:
+            B, _, _ = cur_input_dict[list(cur_input_dict.keys())[0]].size()
+            # must transform initial input into the local frame
+            # get world2aligned rot and translation
+            root_orient_mat = cur_input_dict["root_orient"]
+            pose_body_mat = cur_input_dict["pose_body"]
+            if "root_orient" in self.data_names and self.in_rot_rep != "mat":
+                root_orient_mat = convert_to_rotmat(
+                    root_orient_mat, rep=self.in_rot_rep
+                )
+            if "pose_body" in self.data_names and self.in_rot_rep != "mat":
+                pose_body_mat = convert_to_rotmat(pose_body_mat, rep=self.in_rot_rep)
+            root_orient_mat = root_orient_mat[:, -1].reshape((B, 3, 3))
+            world2aligned_rot = compute_world2aligned_mat(root_orient_mat)
+            world2aligned_trans = torch.cat(
+                [
+                    -cur_input_dict["trans"][:, -1, :2],
+                    torch.zeros((B, 1)).to(root_orient_mat),
+                ],
+                axis=1,
+            )
+            # compute trans2joint
+            if self.need_trans2joint:
+                trans2joint = -(
+                    cur_input_dict["joints"][:, -1, :2] + world2aligned_trans[:, :2]
+                )
+                trans2joint = torch.cat(
+                    [trans2joint, torch.zeros((B, 1)).to(trans2joint)], axis=1
+                ).reshape((B, 1, 1, 3))
+            # transform to local frame
+            cur_input_dict = self.apply_world2local_trans(
+                world2aligned_trans,
+                world2aligned_rot,
+                trans2joint,
+                cur_input_dict,
+                cur_input_dict,
+                invert=False,
+            )
+        # check to make sure we have enough input steps, if not, pad
+        pad_x_past = x_past is not None and x_past.size(1) < self.steps_in
+        pad_in_dict = (
+            cur_input_dict[list(cur_input_dict.keys())[0]].size(1) < self.steps_in
+        )
+        if pad_x_past:
+            num_pad_steps = self.steps_in - x_past.size(1)
+            cur_padding = torch.zeros(
+                (x_past.size(0), num_pad_steps, x_past.size(2))
+            ).to(
+                x_past
+            )  # assuming all data is B x T x D
+            x_past = torch.cat([cur_padding, x_past], axis=1)
+        if pad_in_dict:
+            for k in cur_input_dict.keys():
+                cur_in_dat = cur_input_dict[k]
+                num_pad_steps = self.steps_in - cur_in_dat.size(1)
+                cur_padding = torch.zeros(
+                    (cur_in_dat.size(0), num_pad_steps, cur_in_dat.size(2))
+                ).to(
+                    cur_in_dat
+                )  # assuming all data is B x T x D
+                padded_in_dat = torch.cat([cur_padding, cur_in_dat], axis=1)
+                cur_input_dict[k] = padded_in_dat
+        if x_past is None or canonicalize_input:
+            x_past = [cur_input_dict[k] for k in self.data_names]
+            x_past = torch.cat(x_past, axis=2)
+        B, S, D = x_past.size()
+        past_in = x_past.reshape((B, -1))
+        global_world2local_rot = (
+            torch.eye(3).reshape((1, 1, 3, 3)).expand((B, 1, 3, 3)).to(x_past)
+        )
+        global_world2local_trans = torch.zeros((B, 1, 3)).to(x_past)
+        if canonicalize_input and uncanonicalize_output:
+            global_world2local_rot = world2aligned_rot.unsqueeze(1)
+            global_world2local_trans = world2aligned_trans.unsqueeze(1)
+        trans2joint = torch.zeros((B, 1, 1, 3)).to(x_past)
+        if self.need_trans2joint:
+            trans2joint = -torch.cat(
+                [cur_input_dict["joints"][:, -1, :2], torch.zeros((B, 1)).to(x_past)],
+                axis=1,
+            ).reshape(
+                (B, 1, 1, 3)
+            )  # same for whole sequence
+        pred_local_seq = []
+        pred_global_seq = []
+        prior_seq = []
+        z_out_seq = []
+        for t in range(num_steps):
+            x_pred_dict = None
+            # sample next step
+            z_in = None
+            if z_seq is not None:
+                z_in = z_seq[:, t]
+            sample_out = self.sample_step(
+                past_in,
+                use_mean=use_mean,
+                z=z_in,
+                return_prior=return_prior,
+                return_z=return_z,
+            )
+            if return_prior:
+                prior_out = sample_out["prior"]
+                prior_seq.append(prior_out)
+            if return_z:
+                z_out = sample_out["z"]
+                z_out_seq.append(z_out)
+            decoder_out = sample_out["decoder_out"]
+            # split output predictions and transform out rotations to matrices
+            x_pred_dict = self.split_output(decoder_out, convert_rots=True)
+            if self.steps_out > 1:
+                for k in x_pred_dict.keys():
+                    # only want immediate next frame prediction
+                    x_pred_dict[k] = x_pred_dict[k][:, 0:1, :]
+            pred_local_seq.append(x_pred_dict)
+            # output is the actual regressed joints, but input to next step can use smpl joints
+            x_pred_smpl_joints = None
+            if self.use_smpl_joint_inputs and gender is not None and betas is not None:
+                # this assumes the model is actually outputting everything we need to run SMPL
+                # also assumes single output step
+                smpl_trans = x_pred_dict["trans"].reshape((B, 3))
+                smpl_root_orient = rotation_matrix_to_angle_axis(
+                    x_pred_dict["root_orient"].reshape((B, 3, 3))
+                ).reshape((B, 3))
+                smpl_betas = betas[:, 0, :]
+                smpl_pose_body = rotation_matrix_to_angle_axis(
+                    x_pred_dict["pose_body"].reshape((B * (J - 1), 3, 3))
+                ).reshape((B, (J - 1) * 3))
+                smpl_vals = [smpl_trans, smpl_root_orient, smpl_betas, smpl_pose_body]
+                # each batch index may be a different gender
+                gender_names = ["male", "female", "neutral"]
+                pred_joints = []
+                prev_nbidx = 0
+                cat_idx_map = np.ones((B), dtype=np.int) * -1
+                for gender_name in gender_names:
+                    gender_idx = np.array(gender) == gender_name
+                    nbidx = np.sum(gender_idx)
+                    cat_idx_map[gender_idx] = np.arange(
+                        prev_nbidx, prev_nbidx + nbidx, dtype=np.int
+                    )
+                    prev_nbidx += nbidx
+                    gender_smpl_vals = [val[gender_idx] for val in smpl_vals]
+                    # need to pad extra frames with zeros in case not as long as expected
+                    pad_size = self.smpl_batch_size - nbidx
+                    if pad_size == B:
+                        # skip if no frames for this gender
+                        continue
+                    pad_list = gender_smpl_vals
+                    if pad_size < 0:
+                        raise Exception(
+                            "SMPL model batch size not large enough to accomodate!"
+                        )
+                    elif pad_size > 0:
+                        pad_list = self.zero_pad_tensors(pad_list, pad_size)
+                    # reconstruct SMPL
+                    cur_pred_trans, cur_pred_orient, cur_betas, cur_pred_pose = pad_list
+                    bm = self.bm_dict[gender_name]
+                    pred_body = bm(
+                        pose_body=cur_pred_pose,
+                        betas=cur_betas,
+                        root_orient=cur_pred_orient,
+                        trans=cur_pred_trans,
+                    )
+                    if pad_size > 0:
+                        pred_joints.append(pred_body.Jtr[:-pad_size])
+                    else:
+                        pred_joints.append(pred_body.Jtr)
+                # cat all genders and reorder to original batch ordering
+                x_pred_smpl_joints = torch.cat(pred_joints, axis=0)[
+                    :, : len(SMPL_JOINTS), :
+                ].reshape((B, 1, -1))
+                x_pred_smpl_joints = x_pred_smpl_joints[cat_idx_map]
+            # prepare input to next step
+            # update input dict with new frame
+            del_keys = []
+            for k in cur_input_dict.keys():
+                if k in x_pred_dict:
+                    # drop oldest frame and add new prediction
+                    keep_frames = cur_input_dict[k][:, 1:, :]
+                    # print(keep_frames.size())
+                    if (
+                        k == "joints"
+                        and self.use_smpl_joint_inputs
+                        and x_pred_smpl_joints is not None
+                    ):
+                        cur_input_dict[k] = torch.cat(
+                            [keep_frames, x_pred_smpl_joints], axis=1
+                        )
+                    else:
+                        cur_input_dict[k] = torch.cat(
+                            [keep_frames, x_pred_dict[k]], axis=1
+                        )
+                else:
+                    del_keys.append(k)
+            for k in del_keys:
+                del cur_input_dict[k]
+            # get world2aligned rot and translation
+            root_orient_mat = x_pred_dict["root_orient"][:, 0, :].reshape((B, 3, 3))
+            world2aligned_rot = compute_world2aligned_mat(root_orient_mat)
+            world2aligned_trans = torch.cat(
+                [-x_pred_dict["trans"][:, 0, :2], torch.zeros((B, 1)).to(x_past)],
+                axis=1,
+            )
+            #
+            # transform inputs to this local frame (body pose is not affected) for next step
+            #
+            cur_input_dict = self.apply_world2local_trans(
+                world2aligned_trans,
+                world2aligned_rot,
+                trans2joint,
+                cur_input_dict,
+                cur_input_dict,
+                invert=False,
+            )
+            # convert rots to correct input format
+            if self.in_rot_rep == "aa":
+                if "root_orient" in self.data_names:
+                    cur_input_dict["root_orient"] = rotation_matrix_to_angle_axis(
+                        cur_input_dict["root_orient"].reshape((B * S, 3, 3))
+                    ).reshape((B, S, 3))
+                if "pose_body" in self.data_names:
+                    cur_input_dict["pose_body"] = rotation_matrix_to_angle_axis(
+                        cur_input_dict["pose_body"].reshape((B * S * (J - 1), 3, 3))
+                    ).reshape((B, S, (J - 1) * 3))
+            elif self.in_rot_rep == "6d":
+                if "root_orient" in self.data_names:
+                    cur_input_dict["root_orient"] = cur_input_dict["root_orient"][
+                        :, :, :6
+                    ]
+                if "pose_body" in self.data_names:
+                    cur_input_dict["pose_body"] = (
+                        cur_input_dict["pose_body"]
+                        .reshape((B, S, J - 1, 9))[:, :, :, :6]
+                        .reshape((B, S, (J - 1) * 6))
+                    )
+            #
+            # compute current world output and update world2local transform
+            #
+            cur_world_dict = dict()
+            cur_world_dict = self.apply_world2local_trans(
+                global_world2local_trans,
+                global_world2local_rot,
+                trans2joint,
+                x_pred_dict,
+                cur_world_dict,
+                invert=True,
+            )
+            #
+            # update world2local transform
+            #
+            global_world2local_trans = torch.cat(
+                [
+                    -cur_world_dict["trans"][:, 0:1, :2],
+                    torch.zeros((B, 1, 1)).to(x_past),
+                ],
+                axis=2,
+            )
+            # print(world2aligned_rot)
+            global_world2local_rot = torch.matmul(
+                global_world2local_rot, world2aligned_rot.reshape((B, 1, 3, 3))
+            )
+            pred_global_seq.append(cur_world_dict)
+            # cat all inputs together to form past_in
+            in_data_list = []
+            for k in self.data_names:
+                in_data_list.append(cur_input_dict[k])
+            past_in = torch.cat(in_data_list, axis=2)
+            past_in = past_in.reshape((B, -1))
+        # aggregate global pred_seq
+        pred_seq_out = dict()
+        for k in pred_global_seq[0].keys():
+            pred_seq_out[k] = torch.cat(
+                [pred_global_seq[i][k] for i in range(len(pred_global_seq))], axis=1
+            )
+        if return_z:
+            z_out_seq = torch.stack(z_out_seq, dim=1)
+            pred_seq_out["z"] = z_out_seq
+        if return_prior:
+            pm = torch.stack([prior_seq[i][0] for i in range(len(prior_seq))], axis=1)
+            pv = torch.stack([prior_seq[i][1] for i in range(len(prior_seq))], axis=1)
+            return pred_seq_out, (pm, pv)
+        else:
+            return pred_seq_out
+    def sample_step(
+        self,
+        past_in,
+        t_in=None,
+        use_mean=False,
+        z=None,
+        return_prior=False,
+        return_z=False,
+    ):
+        """
+        Given past, samples next future state by sampling from prior or posterior and decoding.
+        If z (B x D) is not None, uses the given z instead of sampling from posterior or prior
+        Returns:
+        - decoder_out : (B x steps_out x D) output of the decoder for the immediate next step
+        """
+        B = past_in.size(0)
+        pm, pv = None, None
+        if t_in is not None:
+            # use past and future to encode latent transition
+            pm, pv = self.posterior(past_in, t_in)
+        else:
+            # prior
+            if self.use_conditional_prior:
+                # predict prior based on past
+                pm, pv = self.prior(past_in)
+            else:
+                # use standard normal
+                pm, pv = torch.zeros((B, self.latent_size)).to(past_in), torch.ones(
+                    (B, self.latent_size)
+                ).to(past_in)
+        # sample from distrib or use mean
+        if z is None:
+            if not use_mean:
+                z = self.rsample(pm, pv)
+            else:
+                z = pm  # NOTE: use mean
+        # decode to get next step
+        decoder_out = self.decode(z, past_in)
+        decoder_out = decoder_out.reshape(
+            (B, self.steps_out, -1)
+        )  # B x steps_out x D_out
+        out_dict = {"decoder_out": decoder_out}
+        if return_prior:
+            out_dict["prior"] = (pm, pv)
+        if return_z:
+            out_dict["z"] = z
+        return out_dict
+    def infer_global_seq(self, global_seq, full_forward_pass=False):
+        """
+        Given a sequence of global states, formats it (transform each step into local frame and makde B x steps_in x D)
+        and runs inference (compute prior/posterior of z for the sequence).
+        If full_forward_pass is true, does an entire forward pass at each step rather than just inference.
+        Rotations should be in in_rot_rep format.
+        """
+        # used to compute output zero padding
+        needed_future_steps = (self.steps_out - 1) * self.out_step_size
+        prior_m_seq = []
+        prior_v_seq = []
+        post_m_seq = []
+        post_v_seq = []
+        pred_dict_seq = []
+        B, T, _ = global_seq[list(global_seq.keys())[0]].size()
+        J = len(SMPL_JOINTS)
+        trans2joint = None
+        for t in range(T - 1):
+            # get world2aligned rot and translation
+            world2aligned_rot = world2aligned_trans = None
+            root_orient_mat = global_seq["root_orient"][:, t, :].reshape((B, 3, 3))
+            world2aligned_rot = compute_world2aligned_mat(root_orient_mat)
+            world2aligned_trans = torch.cat(
+                [
+                    -global_seq["trans"][:, t, :2],
+                    torch.zeros((B, 1)).to(root_orient_mat),
+                ],
+                axis=1,
+            )
+            # compute trans2joint at first step
+            if t == 0 and self.need_trans2joint:
+                trans2joint = -(
+                    global_seq["joints"][:, t, :2] + world2aligned_trans[:, :2]
+                )  # we cannot make the assumption that the first frame is already canonical
+                trans2joint = torch.cat(
+                    [trans2joint, torch.zeros((B, 1)).to(trans2joint)], axis=1
+                ).reshape((B, 1, 1, 3))
+            # get current window
+            cur_data_dict = dict()
+            for k in global_seq.keys():
+                # get in steps
+                in_sidx = max(0, t - self.steps_in + 1)
+                cur_in_seq = global_seq[k][:, in_sidx : (t + 1), :]
+                if cur_in_seq.size(1) < self.steps_in:
+                    # must zero pad front
+                    num_pad_steps = self.steps_in - cur_in_seq.size(1)
+                    cur_padding = torch.zeros(
+                        (cur_in_seq.size(0), num_pad_steps, cur_in_seq.size(2))
+                    ).to(
+                        cur_in_seq
+                    )  # assuming all data is B x T x D
+                    cur_in_seq = torch.cat([cur_padding, cur_in_seq], axis=1)
+                # get out steps
+                cur_out_seq = global_seq[k][
+                    :, (t + 1) : (t + 2 + needed_future_steps) : self.out_step_size
+                ]
+                if cur_out_seq.size(1) < self.steps_out:
+                    # zero pad
+                    num_pad_steps = self.steps_out - cur_out_seq.size(1)
+                    cur_padding = torch.zeros_like(cur_out_seq[:, 0])
+                    cur_padding = torch.stack([cur_padding] * num_pad_steps, axis=1)
+                    cur_out_seq = torch.cat([cur_out_seq, cur_padding], axis=1)
+                cur_data_dict[k] = torch.cat([cur_in_seq, cur_out_seq], axis=1)
+            # transform to local frame
+            cur_data_dict = self.apply_world2local_trans(
+                world2aligned_trans,
+                world2aligned_rot,
+                trans2joint,
+                cur_data_dict,
+                cur_data_dict,
+                invert=False,
+            )
+            # create x_past and x_t
+            # cat all inputs together to form past_in
+            in_data_list = []
+            for k in self.data_names:
+                in_data_list.append(cur_data_dict[k][:, : self.steps_in, :])
+            x_past = torch.cat(in_data_list, axis=2)
+            # cat all outputs together to form x_t
+            out_data_list = []
+            for k in self.data_names:
+                out_data_list.append(cur_data_dict[k][:, self.steps_in :, :])
+            x_t = torch.cat(out_data_list, axis=2)
+            if full_forward_pass:
+                x_pred_dict = self(x_past, x_t)
+                pred_dict_seq.append(x_pred_dict)
+            else:
+                # perform inference
+                prior_z, posterior_z = self.infer(x_past, x_t)
+                # save z
+                prior_m_seq.append(prior_z[0])
+                prior_v_seq.append(prior_z[1])
+                post_m_seq.append(posterior_z[0])
+                post_v_seq.append(posterior_z[1])
+        if full_forward_pass:
+            # pred_dict_seq
+            pred_seq_out = dict()
+            for k in pred_dict_seq[0].keys():
+                # print(k)
+                if k == "posterior_distrib" or k == "prior_distrib":
+                    m = torch.stack(
+                        [pred_dict_seq[i][k][0] for i in range(len(pred_dict_seq))],
+                        axis=1,
+                    )
+                    v = torch.stack(
+                        [pred_dict_seq[i][k][1] for i in range(len(pred_dict_seq))],
+                        axis=1,
+                    )
+                    pred_seq_out[k] = (m, v)
+                else:
+                    pred_seq_out[k] = torch.stack(
+                        [pred_dict_seq[i][k] for i in range(len(pred_dict_seq))], axis=1
+                    )
+            return pred_seq_out
+        else:
+            prior_m_seq = torch.stack(prior_m_seq, axis=1)
+            prior_v_seq = torch.stack(prior_v_seq, axis=1)
+            post_m_seq = torch.stack(post_m_seq, axis=1)
+            post_v_seq = torch.stack(post_v_seq, axis=1)
+            return (prior_m_seq, prior_v_seq), (post_m_seq, post_v_seq)
+    def infer(self, x_past, x_t):
+        """
+        Inference (compute prior and posterior distribution of z) for a batch of single steps.
+        NOTE: must do processing before passing in to ensure correct format that this function expects.
+        Input:
+        - x_past (B x steps_in x D)
+        - x_t    (B x steps_out x D)
+        Returns:
+        - prior_distrib (mu, var)
+        - posterior_distrib (mu, var)
+        """
+        B, _, D = x_past.size()
+        past_in = x_past.reshape((B, -1))
+        t_in = x_t.reshape((B, -1))
+        prior_z, posterior_z = self.infer_step(past_in, t_in)
+        return prior_z, posterior_z
+    def infer_step(self, past_in, t_in):
+        """
+        single step that computes both prior and posterior for training. Samples from posterior
+        """
+        B = past_in.size(0)
+        # use past and future to encode latent transition
+        qm, qv = self.posterior(past_in, t_in)
+        # prior
+        pm, pv = None, None
+        if self.use_conditional_prior:
+            # predict prior based on past
+            pm, pv = self.prior(past_in)
+        else:
+            # use standard normal
+            pm, pv = torch.zeros_like(qm), torch.ones_like(qv)
+        return (pm, pv), (qm, qv)
+class MLP(nn.Module):
+    def __init__(
+        self,
+        layers=[3, 128, 128, 3],
+        nonlinearity=nn.ReLU,
+        use_gn=True,
+        skip_input_idx=None,
+    ):
+        """
+        If skip_input_idx is not None, the input feature after idx skip_input_idx will be skip connected to every later of the MLP.
+        """
+        super(MLP, self).__init__()
+        in_size = layers[0]
+        out_channels = layers[1:]
+        # input layer
+        layers = []
+        layers.append(nn.Linear(in_size, out_channels[0]))
+        skip_size = 0 if skip_input_idx is None else (in_size - skip_input_idx)
+        # now the rest
+        for layer_idx in range(1, len(out_channels)):
+            fc_layer = nn.Linear(
+                out_channels[layer_idx - 1] + skip_size, out_channels[layer_idx]
+            )
+            if use_gn:
+                bn_layer = nn.GroupNorm(16, out_channels[layer_idx - 1])
+                layers.append(bn_layer)
+            layers.extend([nonlinearity(), fc_layer])
+        self.net = nn.ModuleList(layers)
+        self.skip_input_idx = skip_input_idx
+    def forward(self, x):
+        """
+        B x D x * : batch norm done over dim D
+        """
+        skip_in = None
+        if self.skip_input_idx is not None:
+            skip_in = x[:, self.skip_input_idx :]
+        for i, layer in enumerate(self.net):
+            if (
+                self.skip_input_idx is not None
+                and i > 0
+                and isinstance(layer, nn.Linear)
+            ):
+                x = torch.cat([x, skip_in], dim=1)
+            x = layer(x)
+        return x

slahmr/slahmr/humor/transforms.py ADDED Viewed

	@@ -0,0 +1,472 @@

+"""
+Taken from https://github.com/davrempe/humor
+"""
+import copy
+import torch
+import numpy as np
+from torch.nn import functional as F
+from body_model.utils import SMPL_JOINTS
+#
+# For computing local body frame
+#
+GLOB_DEVICE = (
+    torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
+)
+XY_AXIS_GLOB = torch.Tensor([[1.0, 1.0, 0.0]]).to(device=GLOB_DEVICE)
+X_AXIS_GLOB = torch.Tensor([[1.0, 0.0, 0.0]]).to(device=GLOB_DEVICE)
+def compute_aligned_from_right(body_right):
+    xy_axis = XY_AXIS_GLOB
+    x_axis = X_AXIS_GLOB
+    body_right_x_proj = body_right[:, 0:1] / (
+        torch.norm(body_right[:, :2], dim=1, keepdim=True) + 1e-6
+    )
+    body_right_x_proj = torch.clamp(
+        body_right_x_proj, min=-1.0, max=1.0
+    )  # avoid acos error
+    world2aligned_angle = torch.acos(
+        body_right_x_proj
+    )  # project to world x axis, and compute angle
+    body_right = body_right * xy_axis
+    world2aligned_axis = torch.linalg.cross(body_right, x_axis.expand_as(body_right))
+    world2aligned_aa = (
+        world2aligned_axis
+        / (torch.norm(world2aligned_axis, dim=1, keepdim=True) + 1e-6)
+    ) * world2aligned_angle
+    world2aligned_mat = batch_rodrigues(world2aligned_aa)
+    return world2aligned_mat, world2aligned_aa
+def compute_world2aligned_mat(rot_pos):
+    """
+    batch of world rotation matrices: B x 3 x 3
+    returns rot mats that align the inputs to the forward direction: B x 3 x 3
+    Torch version
+    """
+    body_right = -rot_pos[:, :, 0]  # .clone() # in body coordinates body x-axis is left
+    world2aligned_mat, world2aligned_aa = compute_aligned_from_right(body_right)
+    return world2aligned_mat
+def compute_world2aligned_joints_mat(joints):
+    """
+    Compute world to canonical frame (rotation around up axis)
+    from the given batch of joints (B x J x 3)
+    """
+    left_idx = SMPL_JOINTS["leftUpLeg"]
+    right_idx = SMPL_JOINTS["rightUpLeg"]
+    body_right = joints[:, right_idx] - joints[:, left_idx]
+    body_right = body_right / torch.norm(body_right, dim=1, keepdim=True)
+    world2aligned_mat, world2aligned_aa = compute_aligned_from_right(body_right)
+    return world2aligned_mat
+def convert_to_rotmat(pred_rot, rep="aa"):
+    """
+    Converts rotation rep to rotation matrix based on the given type.
+    pred_rot : B x T x N
+    """
+    B, T, _ = pred_rot.size()
+    pred_rot_mat = None
+    if rep == "aa":
+        pred_rot_mat = batch_rodrigues(pred_rot.reshape(-1, 3))
+    elif rep == "6d":
+        pred_rot_mat = rot6d_to_rotmat(pred_rot.reshape(-1, 6))
+    elif rep == "9d":
+        pred_rot_mat = rot9d_to_rotmat(pred_rot.reshape(-1, 9))
+    return pred_rot_mat.reshape((B, T, -1))
+#
+# Many of these functions taken from https://github.com/mkocabas/VIBE/blob/a859e45a907379aa2fba65a7b620b4a2d65dcf1b/lib/utils/geometry.py
+# Please see their license for usage restrictions.
+#
+def matrot2axisangle(matrots):
+    """
+    :param matrots: N*num_joints*9
+    :return: N*num_joints*3
+    """
+    import cv2
+    batch_size = matrots.shape[0]
+    matrots = matrots.reshape([batch_size, -1, 9])
+    out_axisangle = []
+    for mIdx in range(matrots.shape[0]):
+        cur_axisangle = []
+        for jIdx in range(matrots.shape[1]):
+            a = cv2.Rodrigues(matrots[mIdx, jIdx : jIdx + 1, :].reshape(3, 3))[
+                0
+            ].reshape((1, 3))
+            cur_axisangle.append(a)
+        out_axisangle.append(np.array(cur_axisangle).reshape([1, -1, 3]))
+    return np.vstack(out_axisangle)
+def axisangle2matrots(axisangle):
+    """
+    :param axisangle: N*num_joints*3
+    :return: N*num_joints*9
+    """
+    import cv2
+    batch_size = axisangle.shape[0]
+    axisangle = axisangle.reshape([batch_size, -1, 3])
+    out_matrot = []
+    for mIdx in range(axisangle.shape[0]):
+        cur_axisangle = []
+        for jIdx in range(axisangle.shape[1]):
+            a = cv2.Rodrigues(axisangle[mIdx, jIdx : jIdx + 1, :].reshape(1, 3))[0]
+            cur_axisangle.append(a)
+        out_matrot.append(np.array(cur_axisangle).reshape([1, -1, 9]))
+    return np.vstack(out_matrot)
+def make_rot_homog(rotation_matrix):
+    if rotation_matrix.shape[1:] == (3, 3):
+        rot_mat = rotation_matrix.reshape(-1, 3, 3)
+        hom = (
+            torch.tensor([0, 0, 1], dtype=torch.float32, device=rotation_matrix.device)
+            .reshape(1, 3, 1)
+            .expand(rot_mat.shape[0], -1, -1)
+        )
+        rotation_matrix = torch.cat([rot_mat, hom], dim=-1)
+    return rotation_matrix
+def skew(v):
+    """
+    Returns skew symmetric (B x 3 x 3) mat from vector v: B x 3
+    """
+    B, D = v.size()
+    assert D == 3
+    skew_mat = torch.zeros((B, 3, 3)).to(v)
+    skew_mat[:, 0, 1] = v[:, 2]
+    skew_mat[:, 1, 0] = -v[:, 2]
+    skew_mat[:, 0, 2] = v[:, 1]
+    skew_mat[:, 2, 0] = -v[:, 1]
+    skew_mat[:, 1, 2] = v[:, 0]
+    skew_mat[:, 2, 1] = -v[:, 0]
+    return skew_mat
+def batch_rodrigues(rot_vecs, epsilon=1e-8, dtype=torch.float32):
+    """Calculates the rotation matrices for a batch of rotation vectors
+    Parameters
+    ----------
+    rot_vecs: torch.tensor Nx3
+        array of N axis-angle vectors
+    Returns
+    -------
+    R: torch.tensor Nx3x3
+        The rotation matrices for the given axis-angle parameters
+    """
+    batch_size = rot_vecs.shape[0]
+    device = rot_vecs.device
+    angle = torch.norm(rot_vecs + 1e-8, dim=1, keepdim=True)
+    rot_dir = rot_vecs / angle
+    cos = torch.unsqueeze(torch.cos(angle), dim=1)
+    sin = torch.unsqueeze(torch.sin(angle), dim=1)
+    # Bx1 arrays
+    rx, ry, rz = torch.split(rot_dir, 1, dim=1)
+    K = torch.zeros((batch_size, 3, 3), dtype=dtype, device=device)
+    zeros = torch.zeros((batch_size, 1), dtype=dtype, device=device)
+    K = torch.cat([zeros, -rz, ry, rz, zeros, -rx, -ry, rx, zeros], dim=1).view(
+        (batch_size, 3, 3)
+    )
+    ident = torch.eye(3, dtype=dtype, device=device).unsqueeze(dim=0)
+    rot_mat = ident + sin * K + (1 - cos) * torch.bmm(K, K)
+    return rot_mat
+def quat2mat(quat):
+    """
+    This function is borrowed from https://github.com/MandyMo/pytorch_HMR/blob/master/src/util.py#L50
+    Convert quaternion coefficients to rotation matrix.
+    Args:
+        quat: size = [batch_size, 4] 4 <===>(w, x, y, z)
+    Returns:
+        Rotation matrix corresponding to the quaternion -- size = [batch_size, 3, 3]
+    """
+    norm_quat = quat
+    norm_quat = norm_quat / norm_quat.norm(p=2, dim=1, keepdim=True)
+    w, x, y, z = norm_quat[:, 0], norm_quat[:, 1], norm_quat[:, 2], norm_quat[:, 3]
+    batch_size = quat.size(0)
+    w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2)
+    wx, wy, wz = w * x, w * y, w * z
+    xy, xz, yz = x * y, x * z, y * z
+    rotMat = torch.stack(
+        [
+            w2 + x2 - y2 - z2,
+            2 * xy - 2 * wz,
+            2 * wy + 2 * xz,
+            2 * wz + 2 * xy,
+            w2 - x2 + y2 - z2,
+            2 * yz - 2 * wx,
+            2 * xz - 2 * wy,
+            2 * wx + 2 * yz,
+            w2 - x2 - y2 + z2,
+        ],
+        dim=1,
+    ).view(batch_size, 3, 3)
+    return rotMat
+def rot6d_to_rotmat(x):
+    """Convert 6D rotation representation to 3x3 rotation matrix.
+    Based on Zhou et al., "On the Continuity of Rotation Representations in Neural Networks", CVPR 2019
+    Input:
+        (B,6) Batch of 6-D rotation representations
+    Output:
+        (B,3,3) Batch of corresponding rotation matrices
+    """
+    x = x.view(-1, 3, 2)
+    a1 = x[:, :, 0]
+    a2 = x[:, :, 1]
+    b1 = F.normalize(a1)
+    b2 = F.normalize(a2 - torch.einsum("bi,bi->b", b1, a2).unsqueeze(-1) * b1)
+    # inp = a2 - torch.einsum('bi,bi->b', b1, a2).unsqueeze(-1) * b1
+    # denom = inp.pow(2).sum(dim=1).sqrt().unsqueeze(-1) + 1e-8
+    # b2 = inp / denom
+    b3 = torch.linalg.cross(b1, b2)
+    return torch.stack((b1, b2, b3), dim=-1)
+def rot9d_to_rotmat(x):
+    """
+    Converts 9D rotation output to valid 3x3 rotation amtrix.
+    Based on Levinson et al., An Analysis of SVD for Deep Rotation Estimation.
+    Input:
+        (B, 9)
+    Output:
+        (B, 9)
+    """
+    B = x.size()[0]
+    x = x.reshape((B, 3, 3))
+    u, s, v = torch.svd(x)
+    v_T = v.transpose(-2, -1)
+    s_p = torch.eye(3).to(x).reshape((1, 3, 3)).expand_as(x).clone()
+    s_p[:, 2, 2] = torch.det(torch.matmul(u, v_T))
+    x_out = torch.matmul(torch.matmul(u, s_p), v_T)
+    return x_out.reshape((B, 9))
+def rotation_matrix_to_angle_axis(rotation_matrix):
+    """
+    This function is borrowed from https://github.com/kornia/kornia
+    Convert 3x4 rotation matrix to Rodrigues vector
+    Args:
+        rotation_matrix (Tensor): rotation matrix.
+    Returns:
+        Tensor: Rodrigues vector transformation.
+    Shape:
+        - Input: :math:`(N, 3, 4)`
+        - Output: :math:`(N, 3)`
+    Example:
+        >>> input = torch.rand(2, 3, 4)  # Nx4x4
+        >>> output = tgm.rotation_matrix_to_angle_axis(input)  # Nx3
+    """
+    if rotation_matrix.shape[1:] == (3, 3):
+        rot_mat = rotation_matrix.reshape(-1, 3, 3)
+        hom = (
+            torch.tensor([0, 0, 1], dtype=torch.float32, device=rotation_matrix.device)
+            .reshape(1, 3, 1)
+            .expand(rot_mat.shape[0], -1, -1)
+        )
+        rotation_matrix = torch.cat([rot_mat, hom], dim=-1)
+    quaternion = rotation_matrix_to_quaternion(rotation_matrix)
+    aa = quaternion_to_angle_axis(quaternion)
+    aa[torch.isnan(aa)] = 0.0
+    return aa
+def rotation_matrix_to_quaternion(rotation_matrix, eps=1e-6):
+    """
+    This function is borrowed from https://github.com/kornia/kornia
+    Convert 3x4 rotation matrix to 4d quaternion vector
+    This algorithm is based on algorithm described in
+    https://github.com/KieranWynn/pyquaternion/blob/master/pyquaternion/quaternion.py#L201
+    Args:
+        rotation_matrix (Tensor): the rotation matrix to convert.
+    Return:
+        Tensor: the rotation in quaternion
+    Shape:
+        - Input: :math:`(N, 3, 4)`
+        - Output: :math:`(N, 4)`
+    Example:
+        >>> input = torch.rand(4, 3, 4)  # Nx3x4
+        >>> output = tgm.rotation_matrix_to_quaternion(input)  # Nx4
+    """
+    if not torch.is_tensor(rotation_matrix):
+        raise TypeError(
+            "Input type is not a torch.Tensor. Got {}".format(type(rotation_matrix))
+        )
+    if len(rotation_matrix.shape) > 3:
+        raise ValueError(
+            "Input size must be a three dimensional tensor. Got {}".format(
+                rotation_matrix.shape
+            )
+        )
+    if not rotation_matrix.shape[-2:] == (3, 4):
+        raise ValueError(
+            "Input size must be a N x 3 x 4  tensor. Got {}".format(
+                rotation_matrix.shape
+            )
+        )
+    rmat_t = torch.transpose(rotation_matrix, 1, 2)
+    mask_d2 = rmat_t[:, 2, 2] < eps
+    mask_d0_d1 = rmat_t[:, 0, 0] > rmat_t[:, 1, 1]
+    mask_d0_nd1 = rmat_t[:, 0, 0] < -rmat_t[:, 1, 1]
+    t0 = 1 + rmat_t[:, 0, 0] - rmat_t[:, 1, 1] - rmat_t[:, 2, 2]
+    q0 = torch.stack(
+        [
+            rmat_t[:, 1, 2] - rmat_t[:, 2, 1],
+            t0,
+            rmat_t[:, 0, 1] + rmat_t[:, 1, 0],
+            rmat_t[:, 2, 0] + rmat_t[:, 0, 2],
+        ],
+        -1,
+    )
+    t0_rep = t0.repeat(4, 1).t()
+    t1 = 1 - rmat_t[:, 0, 0] + rmat_t[:, 1, 1] - rmat_t[:, 2, 2]
+    q1 = torch.stack(
+        [
+            rmat_t[:, 2, 0] - rmat_t[:, 0, 2],
+            rmat_t[:, 0, 1] + rmat_t[:, 1, 0],
+            t1,
+            rmat_t[:, 1, 2] + rmat_t[:, 2, 1],
+        ],
+        -1,
+    )
+    t1_rep = t1.repeat(4, 1).t()
+    t2 = 1 - rmat_t[:, 0, 0] - rmat_t[:, 1, 1] + rmat_t[:, 2, 2]
+    q2 = torch.stack(
+        [
+            rmat_t[:, 0, 1] - rmat_t[:, 1, 0],
+            rmat_t[:, 2, 0] + rmat_t[:, 0, 2],
+            rmat_t[:, 1, 2] + rmat_t[:, 2, 1],
+            t2,
+        ],
+        -1,
+    )
+    t2_rep = t2.repeat(4, 1).t()
+    t3 = 1 + rmat_t[:, 0, 0] + rmat_t[:, 1, 1] + rmat_t[:, 2, 2]
+    q3 = torch.stack(
+        [
+            t3,
+            rmat_t[:, 1, 2] - rmat_t[:, 2, 1],
+            rmat_t[:, 2, 0] - rmat_t[:, 0, 2],
+            rmat_t[:, 0, 1] - rmat_t[:, 1, 0],
+        ],
+        -1,
+    )
+    t3_rep = t3.repeat(4, 1).t()
+    mask_c0 = mask_d2 * mask_d0_d1
+    mask_c1 = mask_d2 * ~mask_d0_d1
+    mask_c2 = ~mask_d2 * mask_d0_nd1
+    mask_c3 = ~mask_d2 * ~mask_d0_nd1
+    mask_c0 = mask_c0.view(-1, 1).type_as(q0)
+    mask_c1 = mask_c1.view(-1, 1).type_as(q1)
+    mask_c2 = mask_c2.view(-1, 1).type_as(q2)
+    mask_c3 = mask_c3.view(-1, 1).type_as(q3)
+    q = q0 * mask_c0 + q1 * mask_c1 + q2 * mask_c2 + q3 * mask_c3
+    q /= torch.sqrt(
+        t0_rep * mask_c0
+        + t1_rep * mask_c1
+        + t2_rep * mask_c2  # noqa
+        + t3_rep * mask_c3
+    )  # noqa
+    q *= 0.5
+    return q
+def quaternion_to_angle_axis(quaternion: torch.Tensor) -> torch.Tensor:
+    """
+    This function is borrowed from https://github.com/kornia/kornia
+    Convert quaternion vector to angle axis of rotation.
+    Adapted from ceres C++ library: ceres-solver/include/ceres/rotation.h
+    Args:
+        quaternion (torch.Tensor): tensor with quaternions.
+    Return:
+        torch.Tensor: tensor with angle axis of rotation.
+    Shape:
+        - Input: :math:`(*, 4)` where `*` means, any number of dimensions
+        - Output: :math:`(*, 3)`
+    Example:
+        >>> quaternion = torch.rand(2, 4)  # Nx4
+        >>> angle_axis = tgm.quaternion_to_angle_axis(quaternion)  # Nx3
+    """
+    if not torch.is_tensor(quaternion):
+        raise TypeError(
+            "Input type is not a torch.Tensor. Got {}".format(type(quaternion))
+        )
+    if not quaternion.shape[-1] == 4:
+        raise ValueError(
+            "Input must be a tensor of shape Nx4 or 4. Got {}".format(quaternion.shape)
+        )
+    # unpack input and compute conversion
+    q1: torch.Tensor = quaternion[..., 1]
+    q2: torch.Tensor = quaternion[..., 2]
+    q3: torch.Tensor = quaternion[..., 3]
+    sin_squared_theta: torch.Tensor = q1 * q1 + q2 * q2 + q3 * q3
+    sin_theta: torch.Tensor = torch.sqrt(sin_squared_theta)
+    cos_theta: torch.Tensor = quaternion[..., 0]
+    two_theta: torch.Tensor = 2.0 * torch.where(
+        cos_theta < 0.0,
+        torch.atan2(-sin_theta, -cos_theta),
+        torch.atan2(sin_theta, cos_theta),
+    )
+    k_pos: torch.Tensor = two_theta / sin_theta
+    k_neg: torch.Tensor = 2.0 * torch.ones_like(sin_theta)
+    k: torch.Tensor = torch.where(sin_squared_theta > 0.0, k_pos, k_neg)
+    angle_axis: torch.Tensor = torch.zeros_like(quaternion)[..., :3]
+    angle_axis[..., 0] += q1 * k
+    angle_axis[..., 1] += q2 * k
+    angle_axis[..., 2] += q3 * k
+    return angle_axis

slahmr/slahmr/job_specs/3dpw_test_split.txt ADDED Viewed

	@@ -0,0 +1,248 @@

+downtown_arguing_00 data.start_idx=0 data.end_idx=100
+downtown_arguing_00 data.start_idx=100 data.end_idx=200
+downtown_arguing_00 data.start_idx=200 data.end_idx=300
+downtown_arguing_00 data.start_idx=300 data.end_idx=400
+downtown_arguing_00 data.start_idx=400 data.end_idx=500
+downtown_arguing_00 data.start_idx=500 data.end_idx=600
+downtown_arguing_00 data.start_idx=600 data.end_idx=700
+downtown_arguing_00 data.start_idx=700 data.end_idx=898
+downtown_bar_00 data.start_idx=0 data.end_idx=100
+downtown_bar_00 data.start_idx=100 data.end_idx=200
+downtown_bar_00 data.start_idx=200 data.end_idx=300
+downtown_bar_00 data.start_idx=300 data.end_idx=400
+downtown_bar_00 data.start_idx=400 data.end_idx=500
+downtown_bar_00 data.start_idx=500 data.end_idx=600
+downtown_bar_00 data.start_idx=600 data.end_idx=700
+downtown_bar_00 data.start_idx=700 data.end_idx=800
+downtown_bar_00 data.start_idx=800 data.end_idx=900
+downtown_bar_00 data.start_idx=900 data.end_idx=1000
+downtown_bar_00 data.start_idx=1000 data.end_idx=1100
+downtown_bar_00 data.start_idx=1100 data.end_idx=1200
+downtown_bar_00 data.start_idx=1200 data.end_idx=1300
+downtown_bar_00 data.start_idx=1300 data.end_idx=1403
+downtown_bus_00 data.start_idx=0 data.end_idx=100
+downtown_bus_00 data.start_idx=100 data.end_idx=200
+downtown_bus_00 data.start_idx=200 data.end_idx=300
+downtown_bus_00 data.start_idx=300 data.end_idx=400
+downtown_bus_00 data.start_idx=400 data.end_idx=500
+downtown_bus_00 data.start_idx=500 data.end_idx=600
+downtown_bus_00 data.start_idx=600 data.end_idx=700
+downtown_bus_00 data.start_idx=700 data.end_idx=800
+downtown_bus_00 data.start_idx=800 data.end_idx=900
+downtown_bus_00 data.start_idx=900 data.end_idx=1000
+downtown_bus_00 data.start_idx=1000 data.end_idx=1100
+downtown_bus_00 data.start_idx=1100 data.end_idx=1200
+downtown_bus_00 data.start_idx=1200 data.end_idx=1300
+downtown_bus_00 data.start_idx=1300 data.end_idx=1400
+downtown_bus_00 data.start_idx=1400 data.end_idx=1500
+downtown_bus_00 data.start_idx=1500 data.end_idx=1600
+downtown_bus_00 data.start_idx=1600 data.end_idx=1700
+downtown_bus_00 data.start_idx=1700 data.end_idx=1800
+downtown_bus_00 data.start_idx=1800 data.end_idx=1900
+downtown_bus_00 data.start_idx=1900 data.end_idx=2000
+downtown_bus_00 data.start_idx=2000 data.end_idx=2178
+downtown_cafe_00 data.start_idx=0 data.end_idx=100
+downtown_cafe_00 data.start_idx=100 data.end_idx=200
+downtown_cafe_00 data.start_idx=200 data.end_idx=300
+downtown_cafe_00 data.start_idx=300 data.end_idx=400
+downtown_cafe_00 data.start_idx=400 data.end_idx=500
+downtown_cafe_00 data.start_idx=500 data.end_idx=600
+downtown_cafe_00 data.start_idx=600 data.end_idx=700
+downtown_cafe_00 data.start_idx=700 data.end_idx=800
+downtown_cafe_00 data.start_idx=800 data.end_idx=900
+downtown_cafe_00 data.start_idx=900 data.end_idx=1000
+downtown_cafe_00 data.start_idx=1000 data.end_idx=1100
+downtown_cafe_00 data.start_idx=1100 data.end_idx=1201
+downtown_car_00 data.start_idx=0 data.end_idx=100
+downtown_car_00 data.start_idx=100 data.end_idx=200
+downtown_car_00 data.start_idx=200 data.end_idx=300
+downtown_car_00 data.start_idx=300 data.end_idx=400
+downtown_car_00 data.start_idx=400 data.end_idx=500
+downtown_car_00 data.start_idx=500 data.end_idx=600
+downtown_car_00 data.start_idx=600 data.end_idx=700
+downtown_car_00 data.start_idx=700 data.end_idx=800
+downtown_car_00 data.start_idx=800 data.end_idx=900
+downtown_car_00 data.start_idx=900 data.end_idx=1020
+downtown_crossStreets_00 data.start_idx=0 data.end_idx=100
+downtown_crossStreets_00 data.start_idx=100 data.end_idx=200
+downtown_crossStreets_00 data.start_idx=200 data.end_idx=300
+downtown_crossStreets_00 data.start_idx=300 data.end_idx=400
+downtown_crossStreets_00 data.start_idx=400 data.end_idx=588
+downtown_downstairs_00 data.start_idx=0 data.end_idx=100
+downtown_downstairs_00 data.start_idx=100 data.end_idx=200
+downtown_downstairs_00 data.start_idx=200 data.end_idx=300
+downtown_downstairs_00 data.start_idx=300 data.end_idx=400
+downtown_downstairs_00 data.start_idx=400 data.end_idx=500
+downtown_downstairs_00 data.start_idx=500 data.end_idx=600
+downtown_downstairs_00 data.start_idx=600 data.end_idx=700
+downtown_downstairs_00 data.start_idx=700 data.end_idx=857
+downtown_enterShop_00 data.start_idx=0 data.end_idx=100
+downtown_enterShop_00 data.start_idx=100 data.end_idx=200
+downtown_enterShop_00 data.start_idx=200 data.end_idx=300
+downtown_enterShop_00 data.start_idx=300 data.end_idx=400
+downtown_enterShop_00 data.start_idx=400 data.end_idx=500
+downtown_enterShop_00 data.start_idx=500 data.end_idx=600
+downtown_enterShop_00 data.start_idx=600 data.end_idx=700
+downtown_enterShop_00 data.start_idx=700 data.end_idx=800
+downtown_enterShop_00 data.start_idx=800 data.end_idx=900
+downtown_enterShop_00 data.start_idx=900 data.end_idx=1000
+downtown_enterShop_00 data.start_idx=1000 data.end_idx=1100
+downtown_enterShop_00 data.start_idx=1100 data.end_idx=1200
+downtown_enterShop_00 data.start_idx=1200 data.end_idx=1300
+downtown_enterShop_00 data.start_idx=1300 data.end_idx=1449
+downtown_rampAndStairs_00 data.start_idx=0 data.end_idx=100
+downtown_rampAndStairs_00 data.start_idx=100 data.end_idx=200
+downtown_rampAndStairs_00 data.start_idx=200 data.end_idx=300
+downtown_rampAndStairs_00 data.start_idx=300 data.end_idx=400
+downtown_rampAndStairs_00 data.start_idx=400 data.end_idx=500
+downtown_rampAndStairs_00 data.start_idx=500 data.end_idx=600
+downtown_rampAndStairs_00 data.start_idx=600 data.end_idx=700
+downtown_rampAndStairs_00 data.start_idx=700 data.end_idx=800
+downtown_rampAndStairs_00 data.start_idx=800 data.end_idx=984
+downtown_runForBus_00 data.start_idx=0 data.end_idx=100
+downtown_runForBus_00 data.start_idx=100 data.end_idx=200
+downtown_runForBus_00 data.start_idx=200 data.end_idx=300
+downtown_runForBus_00 data.start_idx=300 data.end_idx=400
+downtown_runForBus_00 data.start_idx=400 data.end_idx=500
+downtown_runForBus_00 data.start_idx=500 data.end_idx=600
+downtown_runForBus_00 data.start_idx=600 data.end_idx=731
+downtown_runForBus_01 data.start_idx=0 data.end_idx=100
+downtown_runForBus_01 data.start_idx=100 data.end_idx=200
+downtown_runForBus_01 data.start_idx=200 data.end_idx=300
+downtown_runForBus_01 data.start_idx=300 data.end_idx=400
+downtown_runForBus_01 data.start_idx=400 data.end_idx=500
+downtown_runForBus_01 data.start_idx=500 data.end_idx=600
+downtown_runForBus_01 data.start_idx=600 data.end_idx=783
+downtown_sitOnStairs_00 data.start_idx=0 data.end_idx=100
+downtown_sitOnStairs_00 data.start_idx=100 data.end_idx=200
+downtown_sitOnStairs_00 data.start_idx=200 data.end_idx=300
+downtown_sitOnStairs_00 data.start_idx=300 data.end_idx=400
+downtown_sitOnStairs_00 data.start_idx=400 data.end_idx=500
+downtown_sitOnStairs_00 data.start_idx=500 data.end_idx=600
+downtown_sitOnStairs_00 data.start_idx=600 data.end_idx=700
+downtown_sitOnStairs_00 data.start_idx=700 data.end_idx=800
+downtown_sitOnStairs_00 data.start_idx=800 data.end_idx=900
+downtown_sitOnStairs_00 data.start_idx=900 data.end_idx=1000
+downtown_sitOnStairs_00 data.start_idx=1000 data.end_idx=1100
+downtown_sitOnStairs_00 data.start_idx=1100 data.end_idx=1200
+downtown_sitOnStairs_00 data.start_idx=1200 data.end_idx=1337
+downtown_stairs_00 data.start_idx=0 data.end_idx=100
+downtown_stairs_00 data.start_idx=100 data.end_idx=200
+downtown_stairs_00 data.start_idx=200 data.end_idx=300
+downtown_stairs_00 data.start_idx=300 data.end_idx=400
+downtown_stairs_00 data.start_idx=400 data.end_idx=500
+downtown_stairs_00 data.start_idx=500 data.end_idx=600
+downtown_stairs_00 data.start_idx=600 data.end_idx=700
+downtown_stairs_00 data.start_idx=700 data.end_idx=800
+downtown_stairs_00 data.start_idx=800 data.end_idx=900
+downtown_stairs_00 data.start_idx=900 data.end_idx=1000
+downtown_stairs_00 data.start_idx=1000 data.end_idx=1100
+downtown_stairs_00 data.start_idx=1100 data.end_idx=1240
+downtown_upstairs_00 data.start_idx=0 data.end_idx=100
+downtown_upstairs_00 data.start_idx=100 data.end_idx=200
+downtown_upstairs_00 data.start_idx=200 data.end_idx=300
+downtown_upstairs_00 data.start_idx=300 data.end_idx=400
+downtown_upstairs_00 data.start_idx=400 data.end_idx=500
+downtown_upstairs_00 data.start_idx=500 data.end_idx=600
+downtown_upstairs_00 data.start_idx=600 data.end_idx=700
+downtown_upstairs_00 data.start_idx=700 data.end_idx=845
+downtown_walkBridge_01 data.start_idx=0 data.end_idx=100
+downtown_walkBridge_01 data.start_idx=100 data.end_idx=200
+downtown_walkBridge_01 data.start_idx=200 data.end_idx=300
+downtown_walkBridge_01 data.start_idx=300 data.end_idx=400
+downtown_walkBridge_01 data.start_idx=400 data.end_idx=500
+downtown_walkBridge_01 data.start_idx=500 data.end_idx=600
+downtown_walkBridge_01 data.start_idx=600 data.end_idx=700
+downtown_walkBridge_01 data.start_idx=700 data.end_idx=800
+downtown_walkBridge_01 data.start_idx=800 data.end_idx=900
+downtown_walkBridge_01 data.start_idx=900 data.end_idx=1000
+downtown_walkBridge_01 data.start_idx=1000 data.end_idx=1100
+downtown_walkBridge_01 data.start_idx=1100 data.end_idx=1200
+downtown_walkBridge_01 data.start_idx=1200 data.end_idx=1372
+downtown_walkUphill_00 data.start_idx=0 data.end_idx=100
+downtown_walkUphill_00 data.start_idx=100 data.end_idx=200
+downtown_walkUphill_00 data.start_idx=200 data.end_idx=388
+downtown_walking_00 data.start_idx=0 data.end_idx=100
+downtown_walking_00 data.start_idx=100 data.end_idx=200
+downtown_walking_00 data.start_idx=200 data.end_idx=300
+downtown_walking_00 data.start_idx=300 data.end_idx=400
+downtown_walking_00 data.start_idx=400 data.end_idx=500
+downtown_walking_00 data.start_idx=500 data.end_idx=600
+downtown_walking_00 data.start_idx=600 data.end_idx=700
+downtown_walking_00 data.start_idx=700 data.end_idx=800
+downtown_walking_00 data.start_idx=800 data.end_idx=900
+downtown_walking_00 data.start_idx=900 data.end_idx=1000
+downtown_walking_00 data.start_idx=1000 data.end_idx=1100
+downtown_walking_00 data.start_idx=1100 data.end_idx=1200
+downtown_walking_00 data.start_idx=1200 data.end_idx=1387
+downtown_warmWelcome_00 data.start_idx=0 data.end_idx=100
+downtown_warmWelcome_00 data.start_idx=100 data.end_idx=200
+downtown_warmWelcome_00 data.start_idx=200 data.end_idx=300
+downtown_warmWelcome_00 data.start_idx=300 data.end_idx=400
+downtown_warmWelcome_00 data.start_idx=400 data.end_idx=589
+downtown_weeklyMarket_00 data.start_idx=0 data.end_idx=100
+downtown_weeklyMarket_00 data.start_idx=100 data.end_idx=200
+downtown_weeklyMarket_00 data.start_idx=200 data.end_idx=300
+downtown_weeklyMarket_00 data.start_idx=300 data.end_idx=400
+downtown_weeklyMarket_00 data.start_idx=400 data.end_idx=500
+downtown_weeklyMarket_00 data.start_idx=500 data.end_idx=600
+downtown_weeklyMarket_00 data.start_idx=600 data.end_idx=700
+downtown_weeklyMarket_00 data.start_idx=700 data.end_idx=800
+downtown_weeklyMarket_00 data.start_idx=800 data.end_idx=900
+downtown_weeklyMarket_00 data.start_idx=900 data.end_idx=1000
+downtown_weeklyMarket_00 data.start_idx=1000 data.end_idx=1193
+downtown_windowShopping_00 data.start_idx=0 data.end_idx=100
+downtown_windowShopping_00 data.start_idx=100 data.end_idx=200
+downtown_windowShopping_00 data.start_idx=200 data.end_idx=300
+downtown_windowShopping_00 data.start_idx=300 data.end_idx=400
+downtown_windowShopping_00 data.start_idx=400 data.end_idx=500
+downtown_windowShopping_00 data.start_idx=500 data.end_idx=600
+downtown_windowShopping_00 data.start_idx=600 data.end_idx=700
+downtown_windowShopping_00 data.start_idx=700 data.end_idx=800
+downtown_windowShopping_00 data.start_idx=800 data.end_idx=900
+downtown_windowShopping_00 data.start_idx=900 data.end_idx=1000
+downtown_windowShopping_00 data.start_idx=1000 data.end_idx=1100
+downtown_windowShopping_00 data.start_idx=1100 data.end_idx=1200
+downtown_windowShopping_00 data.start_idx=1200 data.end_idx=1300
+downtown_windowShopping_00 data.start_idx=1300 data.end_idx=1400
+downtown_windowShopping_00 data.start_idx=1400 data.end_idx=1500
+downtown_windowShopping_00 data.start_idx=1500 data.end_idx=1600
+downtown_windowShopping_00 data.start_idx=1600 data.end_idx=1700
+downtown_windowShopping_00 data.start_idx=1700 data.end_idx=1800
+downtown_windowShopping_00 data.start_idx=1800 data.end_idx=1948
+flat_guitar_01 data.start_idx=0 data.end_idx=100
+flat_guitar_01 data.start_idx=100 data.end_idx=200
+flat_guitar_01 data.start_idx=200 data.end_idx=300
+flat_guitar_01 data.start_idx=300 data.end_idx=400
+flat_guitar_01 data.start_idx=400 data.end_idx=500
+flat_guitar_01 data.start_idx=500 data.end_idx=600
+flat_guitar_01 data.start_idx=600 data.end_idx=748
+flat_packBags_00 data.start_idx=0 data.end_idx=100
+flat_packBags_00 data.start_idx=100 data.end_idx=200
+flat_packBags_00 data.start_idx=200 data.end_idx=300
+flat_packBags_00 data.start_idx=300 data.end_idx=400
+flat_packBags_00 data.start_idx=400 data.end_idx=500
+flat_packBags_00 data.start_idx=500 data.end_idx=600
+flat_packBags_00 data.start_idx=600 data.end_idx=700
+flat_packBags_00 data.start_idx=700 data.end_idx=800
+flat_packBags_00 data.start_idx=800 data.end_idx=900
+flat_packBags_00 data.start_idx=900 data.end_idx=1000
+flat_packBags_00 data.start_idx=1000 data.end_idx=1100
+flat_packBags_00 data.start_idx=1100 data.end_idx=1279
+office_phoneCall_00 data.start_idx=0 data.end_idx=100
+office_phoneCall_00 data.start_idx=100 data.end_idx=200
+office_phoneCall_00 data.start_idx=200 data.end_idx=300
+office_phoneCall_00 data.start_idx=300 data.end_idx=400
+office_phoneCall_00 data.start_idx=400 data.end_idx=500
+office_phoneCall_00 data.start_idx=500 data.end_idx=600
+office_phoneCall_00 data.start_idx=600 data.end_idx=700
+office_phoneCall_00 data.start_idx=700 data.end_idx=880
+outdoors_fencing_01 data.start_idx=0 data.end_idx=100
+outdoors_fencing_01 data.start_idx=100 data.end_idx=200
+outdoors_fencing_01 data.start_idx=200 data.end_idx=300
+outdoors_fencing_01 data.start_idx=300 data.end_idx=400
+outdoors_fencing_01 data.start_idx=400 data.end_idx=500
+outdoors_fencing_01 data.start_idx=500 data.end_idx=600
+outdoors_fencing_01 data.start_idx=600 data.end_idx=700
+outdoors_fencing_01 data.start_idx=700 data.end_idx=800
+outdoors_fencing_01 data.start_idx=800 data.end_idx=942

slahmr/slahmr/job_specs/davis.txt ADDED Viewed

	@@ -0,0 +1,24 @@

+parkour data=davis fps=24
+lady-running data=davis fps=24
+dance-twirl data=davis fps=24
+lindy-hop data=davis fps=24
+hike data=davis fps=24
+judo data=davis fps=24
+lucia data=davis fps=24
+tennis data=davis fps=24
+skate-park data=davis fps=24
+boxing-fisheye data=davis fps=24
+crossing data=davis fps=24
+loading data=davis fps=24
+bike-packing data=davis fps=24
+dance-jump data=davis fps=24
+hockey data=davis fps=24
+india data=davis fps=24
+kid-football data=davis fps=24
+longboard data=davis fps=24
+schoolgirls data=davis fps=24
+snowboard data=davis fps=24
+stunt data=davis fps=24
+swing data=davis fps=24
+dancing data=davis fps=24
+kite-walk data=davis fps=24