Spaces:

MLAdaptiveIntelligence
/

PRIMA-demo

Configuration error

App Files Files Community

HF Space deploy commited on about 3 hours ago

Commit

cdad419

0 Parent(s):

Deploy snapshot (LFS for demo images per .gitattributes)

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +5 -0
.github/workflows/check-headers.yml +36 -0
.github/workflows/codespell.yml +21 -0
.github/workflows/release-pypi.yml +48 -0
.gitignore +175 -0
README.md +252 -0
app.py +713 -0
chumpy/__init__.py +16 -0
chumpy/ch.py +66 -0
configs/sa_finetune_hrnet_w32.yaml +220 -0
configs_hydra/experiment/default.yaml +28 -0
configs_hydra/experiment/default_val.yaml +34 -0
configs_hydra/experiment/primaStage1.yaml +83 -0
configs_hydra/experiment/primaStage2.yaml +113 -0
configs_hydra/extras/default.yaml +8 -0
configs_hydra/hydra/default.yaml +26 -0
configs_hydra/launcher/local.yaml +13 -0
configs_hydra/launcher/slurm.yaml +22 -0
configs_hydra/paths/default.yaml +18 -0
configs_hydra/train.yaml +46 -0
configs_hydra/trainer/cpu.yaml +6 -0
configs_hydra/trainer/ddp.yaml +14 -0
configs_hydra/trainer/default.yaml +10 -0
configs_hydra/trainer/default_amr.yaml +9 -0
configs_hydra/trainer/gpu.yaml +6 -0
configs_hydra/trainer/mps.yaml +6 -0
demo.py +189 -0
demo.sh +12 -0
demo_data/000000015956_horse.png +3 -0
demo_data/000000315905_zebra.jpg +3 -0
demo_data/beagle.jpg +3 -0
demo_data/n02101388_1188.png +3 -0
demo_data/n02412080_12159.png +3 -0
demo_data/shepherd_hati.jpg +3 -0
demo_tta.py +399 -0
demo_tta.sh +15 -0
eval.py +103 -0
images/teaser.png +3 -0
packages.txt +4 -0
prima/__init__.py +25 -0
prima/configs/__init__.py +99 -0
prima/models/__init__.py +54 -0
prima/models/backbones/__init__.py +19 -0
prima/models/backbones/vit.py +375 -0
prima/models/bioclip_embedding.py +70 -0
prima/models/components/__init__.py +0 -0
prima/models/components/model_utils.py +160 -0
prima/models/components/pose_transformer.py +366 -0
prima/models/components/position_encoding.py +84 -0
prima/models/components/t_cond_mlp.py +204 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,5 @@

+# Hugging Face Hub stores these via Git LFS / Xet (plain PNG/JPG in git are rejected on push).
+demo_data/*.png filter=lfs diff=lfs merge=lfs -text
+demo_data/*.jpg filter=lfs diff=lfs merge=lfs -text
+demo_data/*.jpeg filter=lfs diff=lfs merge=lfs -text
+images/*.png filter=lfs diff=lfs merge=lfs -text

.github/workflows/check-headers.yml ADDED Viewed

	@@ -0,0 +1,36 @@

+---
+    name: Check File Headers
+    on:
+      push:
+        branches: [main]
+      pull_request:
+        branches: [main]
+    jobs:
+      check-headers:
+        name: Check Python file headers
+        runs-on: ubuntu-latest
+        permissions:
+          contents: read
+        steps:
+          - name: Checkout code
+            uses: actions/checkout@v3
+          - name: Set up Python
+            uses: actions/setup-python@v4
+            with:
+              python-version: "3.10"
+          - name: Check headers
+            run: |
+              python scripts/update_headers.py --check
+            continue-on-error: false
+          - name: Provide fix instructions
+            if: failure()
+            run: |
+              echo "::error::Some files are missing proper headers."
+              echo "To fix this, run: python scripts/update_headers.py"
+              echo "Then commit the changes."

.github/workflows/codespell.yml ADDED Viewed

	@@ -0,0 +1,21 @@

+---
+    name: Codespell
+    on:
+      push:
+        branches: [main]
+      pull_request:
+        branches: [main]
+    jobs:
+      codespell:
+        name: Check for spelling errors
+        runs-on: ubuntu-latest
+        steps:
+          - name: Checkout
+            uses: actions/checkout@v3
+          - name: Codespell
+            uses: codespell-project/actions-codespell@v1
+            with:
+               ignore_words_list: prima-animal, mpjpe, uvd, xyz, hm36, cpn, dbb

.github/workflows/release-pypi.yml ADDED Viewed

	@@ -0,0 +1,48 @@

+name: Update pypi release
+on:
+  push:
+    tags:
+      - 'v*.*.*'
+  pull_request:
+    branches:
+      - main
+    types:
+      - labeled
+      - opened
+      - edited
+      - synchronize
+      - reopened
+jobs:
+  release:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Cache dependencies
+        id: pip-cache
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install wheel
+          # NOTE(stes) see https://github.com/pypa/twine/issues/1216#issuecomment-2629069669
+          pip install "packaging>=24.2"
+      - name: Checkout code
+        uses: actions/checkout@v3
+      - name: Build and publish to PyPI
+        if: ${{ github.event_name == 'push' }}
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.TWINE_API_KEY }}
+        run: |
+          pip install build twine
+          python3 -m build
+          ls dist/
+          python3 -m twine upload --verbose dist/*

.gitignore ADDED Viewed

	@@ -0,0 +1,175 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Vscode
+.vscode/
+# Directory
+.gradio/
+demo_out/
+demo_out*/
+data/PRIMA*/
+data/backbone.pth
+logs/
+*.pth
+*.pkl
+datasets/

README.md ADDED Viewed

	@@ -0,0 +1,252 @@

+# PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation
+This is the official implementation of the approach described in the preprint:
+PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation \
+Xiaohang Yu, Ti Wang, Mackenzie Weygandt Mathis
+![PRIMA teaser](images/teaser.png)
+---
+## 🚀 TL;DR
+PRIMA creates a 3D quadruped mesh from a single 2D image. It leverages BioCLIP-based biological priors for robust cross-species shape understanding, then applies test-time adaptation with 2D reprojection and auxiliary keypoint guidance to refine SMAL pose and shape predictions.
+It further can be used to build Quadruped3D, a large-scale pseudo-3D dataset with diverse species and poses.
+PRIMA achieves state-of-the-art results on Animal3D, CtrlAni3D, Quadruped2D, and Animal Kingdom datasets.
+## Installation
+### Install from PyPI
+> Recommended: Python 3.10 and a CUDA-enabled PyTorch installation.
+```bash
+conda create -n prima python=3.10 -y
+conda activate prima
+# Install PyTorch matching your CUDA (example: CUDA 11.8)
+pip install --index-url https://download.pytorch.org/whl/cu118 \
+    "torch==2.2.1" "torchvision==0.17.1" "torchaudio==2.2.1"
+# Install chumpy and PyTorch3D
+python -m pip install --no-build-isolation \
+      "git+https://github.com/mattloper/chumpy.git"
+python -m pip install --no-build-isolation \
+      "git+https://github.com/facebookresearch/pytorch3d.git"
+# Install PRIMA from PyPI
+pip install prima-animal
+```
+`prima-animal` includes demo runtime dependencies used by `demo.py`, `demo_tta.py`, and `app.py` (including Detectron2 and DeepLabCut).
+### Clean install from this repository
+Use these when developing from a **git clone** (not the PyPI wheel). The shell scripts are **non-interactive** (pip uses `--no-input`; `GIT_TERMINAL_PROMPT=0` for git). Put Hugging Face credentials in your environment or git credential helper before pushing the Space.
+**Local (fresh venv, LFS assets, Hub demo weights, smoke test)** — requires **Python 3.10+**
+(Gradio 5.1+ / Space-provided Gradio 6.x and `app.py` type hints). On macOS without `python3.10` on your `PATH`, install
+`brew install python@3.10` and set `PRIMA_PYTHON=/opt/homebrew/bin/python3.10`.
+```bash
+chmod +x scripts/clean_install_local.sh scripts/clean_redeploy_hf_space.sh scripts/deploy_hf_space.sh
+PRIMA_PYTHON=/opt/homebrew/bin/python3.10 ./scripts/clean_install_local.sh
+```
+Options:
+- `PRIMA_VENV=.venv ./scripts/clean_install_local.sh --skip-data` — skip the large `setup_demo_data` download if `data/` is already populated.
+- `./scripts/clean_install_local.sh --wipe-data --force-data` — delete downloaded `data/` assets and redownload.
+- `./scripts/clean_install_local.sh --no-editable` — only `requirements.txt` (no `pip install -e .`); use if editable install fails and you will install the training stack via conda as in the PyPI section above. You still need **Python 3.10+** for Gradio 5.1+. The smoke test sets `PYTHONPATH` to the repo root so `import prima` works without an editable install.
+- **`requirements.txt` pins `deeplabcut==3.0.0rc14`** (SuperAnimal PyTorch API). On macOS, `clean_install_local.sh` installs a PyTables wheel first, then DLC 3.x. Full check: `./scripts/test_local_full.sh`.
+After `requirements.txt`, the script runs **`pip install --no-deps -e .`** so the `prima` package is registered without re-resolving `pyproject.toml` (which would pull **Detectron2** from git again). Install Detectron2 separately if needed: `pip install 'git+https://github.com/facebookresearch/detectron2.git'`.
+**Hugging Face Space (full redeploy from your working tree):**
+Requires [Git LFS / Xet](https://huggingface.co/docs/hub/xet/using-xet-storage#git) tooling (`brew install git-lfs git-xet`, `git xet install`, `git lfs install`). Then:
+```bash
+./scripts/clean_redeploy_hf_space.sh
+```
+This is equivalent to `./scripts/deploy_hf_space.sh` and force-pushes a fresh snapshot to the Space.
+---
+## Demo
+### Checkpoints and data
+The demo scripts auto-download their default Stage 1 PRIMA assets from Hugging
+Face when the checkpoint or matching Hydra config is missing. If you want to
+pre-download all necessary checkpoints and data ahead of time, run:
+```bash
+python scripts/setup_demo_data.py --hf-repo-id MLAdaptiveIntelligence/PRIMA
+```
+Approximate default prefetch volume from Hugging Face is ~5.5 GB total
+(`s1ckpt_inference.ckpt` ~3 GB + `amr_vitbb.pth` ~2.5 GB + SMAL files).
+Expected time is roughly:
+- 100 Mbps: ~7-10 minutes
+- 300 Mbps: ~2-4 minutes
+- 1 Gbps: ~1 minute
+Existing files are reused by default; pass `--force` only if you need to redownload them. If you also need the Stage 3 pretrained model, add `--include-stage3`.
+Expected files in that Hugging Face repo root:
+- `my_smpl_00781_4_all.pkl`
+- `my_smpl_data_00781_4_all.pkl`
+- `walking_toy_symmetric_pose_prior_with_cov_35parts.pkl`
+- `amr_vitbb.pth`
+- `config_s1_HYDRA.yaml`
+- `s1ckpt_inference.ckpt`
+Optional Stage 3 prefetch expects:
+- `config_s3_HYDRA.yaml`
+- `s3ckpt_inference.ckpt`
+### Demo (without TTA)
+Run animal detection + PRIMA 3D pose/shape inference:
+```bash
+bash demo.sh
+```
+Outputs are written to `demo_out/`. Edit `demo.sh` if you want to use a custom
+checkpoint path.
+---
+### Demo (with TTA)
+Run PRIMA inference with test-time adaptation:
+```bash
+bash demo_tta.sh
+```
+Outputs are written to `demo_out_tta/` (before/after TTA renders, keypoints, and
+optional meshes). Edit `demo_tta.sh` if you want to change the checkpoint, TTA
+learning rate, or number of iterations.
+---
+### Gradio demo
+We also provide a simple Gradio-based web demo for interactive testing in the
+browser:
+```bash
+python app.py \
+  --checkpoint data/PRIMAS1/checkpoints/s1ckpt_inference.ckpt \
+  --out_folder demo_out_tta_gradio/
+```
+This starts a local Gradio app (by default on http://127.0.0.1:7860), where
+you can upload images and visualize PRIMA predictions and adaptation results.
+The `s1ckpt_inference.ckpt` checkpoint is downloaded automatically if missing.
+`app.py` picks a **demo profile** automatically:
+| | **Local** (`python app.py`) | **Hugging Face Space** |
+|--|--|--|
+| PRIMA device | GPU if available, else CPU | CPU only |
+| Detectron2 | X-101-FPN | R50-FPN (lighter) |
+| Default TTA iterations | 30 | 0 (PRIMA-only by default) |
+| Save `.obj` meshes | on | off |
+| Preload checkpoint at startup | off | on |
+Override for testing: `PRIMA_DEMO_MODE=local` or `PRIMA_DEMO_MODE=space`.
+#### Hugging Face Space (maintainers)
+Demo images under `demo_data/` and `images/teaser.png` are tracked with **Git LFS**
+(see `.gitattributes`) so they can be pushed to a Hugging Face Space under the Hub’s
+LFS / **Xet** bridge. Install tooling once:
+```bash
+brew install git-lfs git-xet
+git xet install
+git lfs install
+```
+Then from a clean checkout with LFS files present, redeploy the Space (same as `clean_redeploy_hf_space.sh`):
+```bash
+./scripts/deploy_hf_space.sh
+# or
+./scripts/clean_redeploy_hf_space.sh
+```
+The script rsyncs the working tree (not `git archive`) so image files are materialized
+before `git add` turns them into LFS blobs.
+---
+## Training and Evaluation
+### Dataset Setup
+Download datasets from [Animal3D](https://xujiacong.github.io/Animal3D/), [CtrlAni3D](https://github.com/luoxue-star/AniMer?tab=readme-ov-file#training), Quadruped2D, and [Animal Kingdom](https://drive.google.com/file/d/1dk2a0qB0fbVZ4X6eAgP6VJVXj0rxVfsJ/view?usp=drive_link). For Quadruped2D, download the images from [SuperAnimal-Quadruped80K](https://zenodo.org/records/14016777) and our processed annotations from [here](https://drive.google.com/drive/folders/1eBNboxVwl_eGPoC93zxf-U3hmE6e2f-f?usp=sharing). Put all the datasets under `datasets/`.
+### Training
+Two-stage training script:
+```bash
+bash train.sh
+```
+Training outputs are written to `logs/train/runs/<exp_name>/`.
+### Evaluation
+```bash
+python eval.py \
+  --config data/PRIMAS1/.hydra/config.yaml \
+  --checkpoint data/PRIMAS1/checkpoints/s1ckpt_inference.ckpt
+```
+Common values for `--dataset` are controlled by:
+- `configs_hydra/experiment/default_val.yaml`
+---
+## Acknowledgements
+This release builds on several open-source projects, including:
+- [Detectron2](https://github.com/facebookresearch/detectron2)
+- [BioCLIP](https://github.com/Imageomics/BioCLIP)
+- [AniMer](https://github.com/luoxue-star/AniMer)
+- [DeepLabCut](https://github.com/DeepLabCut/DeepLabCut)
+- [SAM3DB](https://github.com/facebookresearch/sam-3d-body)
+---
+## Citation
+If you use this code in your research, please cite our PRIMA paper.
+```bibtex
+@misc{yu_prima,
+  title={PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation},
+  author={Xiaohang Yu and Ti Wang and Mackenzie Weygandt Mathis},
+}
+```
+---
+## Contact
+For issues, please open a GitHub issue in this repository.

app.py ADDED Viewed

	@@ -0,0 +1,713 @@

+"""
+PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation
+Official implementation of the paper:
+"PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation"
+by Xiaohang Yu, Ti Wang, and Mackenzie Weygandt Mathis
+Licensed under a modified MIT license
+"""
+"""Gradio demo for PRIMA + SuperAnimal + TTA.
+This script wraps the ``demo_tta.py`` pipeline into an interactive
+Gradio interface. The overall logic follows:
+1. Given an input image, run Detectron2 to detect animals.
+2. For each detected animal, run PRIMA for 3D pose/shape estimation.
+3. Run the fine-tuned DeepLabCut SuperAnimal model to obtain PRIMA 26-keypoint
+   2D predictions.
+4. Run test-time adaptation (TTA) with user-specified lr and iters.
+5. Render and save before/after TTA results and keypoint visualizations.
+"""
+import argparse
+import os
+import sys
+import tempfile
+import traceback
+from dataclasses import dataclass
+from functools import lru_cache
+from types import SimpleNamespace
+from typing import List, Optional, Tuple
+from pathlib import Path
+import cv2
+import gradio as gr
+import numpy as np
+import torch
+import torch.utils.data
+# Space demo on macOS: limit BLAS threads (PyRender + PyTorch on main thread only).
+if sys.platform == "darwin" and os.environ.get("SPACE_ID"):
+    os.environ.setdefault("OMP_NUM_THREADS", "1")
+    torch.set_num_threads(1)
+# Repo-local minimal ``chumpy`` shim (see ``chumpy/__init__.py``) so SMAL pickles load
+# without installing the full chumpy package in Space builds.
+_REPO_ROOT = Path(__file__).resolve().parent
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+from prima.utils.weights import (
+    DEFAULT_HF_REPO_ID,
+    resolve_prima_checkpoint_path,
+)
+from prima.utils.detection import select_animal_boxes
+# Default checkpoint path following README instructions
+DEFAULT_CHECKPOINT = str(_REPO_ROOT / "data" / "PRIMAS1" / "checkpoints" / "s1ckpt_inference.ckpt")
+DEFAULT_HF_ASSET_REPO = DEFAULT_HF_REPO_ID
+# Output folder for rendered images/meshes and keypoints
+DEFAULT_OUT_FOLDER = "demo_out_tta_gradio"
+_D2_R50_CFG = "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"
+_D2_R50_URL = (
+    "https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/"
+    "faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl"
+)
+_D2_X101_CFG = "COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"
+_D2_X101_URL = (
+    "https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/"
+    "faster_rcnn_X_101_32x8d_FPN_3x/139173657/model_final_68b088.pkl"
+)
+# Gradio example row: (image_rel, tta_lr, tta_iters, det_thresh, kp_thresh, side_view, save_mesh)
+ExampleRow = Tuple[str, float, int, float, float, bool, bool]
+@dataclass(frozen=True)
+class DemoProfile:
+    """Runtime settings for either the full local app or the lightweight HF Space demo."""
+    mode: str
+    prima_device: str  # "auto" (CUDA if available) or "cpu"
+    detectron_config_yaml: str
+    detectron_weights_url: str
+    detectron_device: str  # "auto" or "cpu"
+    default_tta_iters: int
+    max_tta_iters: int
+    default_save_mesh: bool
+    default_side_view: bool
+    preload_assets: bool
+    example_rows: Tuple[ExampleRow, ...]
+    description: str
+    interface_title: str
+    def resolve_prima_device(self) -> torch.device:
+        if self.prima_device == "cpu":
+            return torch.device("cpu")
+        return torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    def resolve_detectron_device(self) -> str:
+        if self.detectron_device == "cpu":
+            return "cpu"
+        return "cuda" if torch.cuda.is_available() else "cpu"
+LOCAL_DEMO_PROFILE = DemoProfile(
+    mode="local",
+    prima_device="auto",
+    detectron_config_yaml=_D2_X101_CFG,
+    detectron_weights_url=_D2_X101_URL,
+    detectron_device="auto",
+    default_tta_iters=30,
+    max_tta_iters=100,
+    default_save_mesh=True,
+    default_side_view=False,
+    preload_assets=False,
+    example_rows=(
+        ("demo_data/000000015956_horse.png", 1e-6, 30, 0.7, 0.1, False, True),
+        ("demo_data/n02412080_12159.png", 1e-6, 30, 0.7, 0.1, False, True),
+        ("demo_data/000000315905_zebra.jpg", 1e-6, 30, 0.7, 0.1, False, True),
+        ("demo_data/beagle.jpg", 1e-6, 0, 0.7, 0.1, False, True),
+        ("demo_data/shepherd_hati.jpg", 1e-6, 0, 0.7, 0.1, False, True),
+    ),
+    description=(
+        "**Local demo** — full pipeline on your machine (GPU when available).\n\n"
+        "Detectron2 **X-101-FPN**, PRIMA mesh recovery, optional **DeepLabCut SuperAnimal + TTA**. "
+        "Set TTA iterations to **0** to skip adaptation. Outputs are saved under "
+        f"`{DEFAULT_OUT_FOLDER}`."
+    ),
+    interface_title=(
+        "PRIMA local demo (GPU/CPU) — detection, mesh recovery, optional TTA"
+    ),
+)
+SPACE_DEMO_PROFILE = DemoProfile(
+    mode="space",
+    prima_device="cpu",
+    detectron_config_yaml=_D2_R50_CFG,
+    detectron_weights_url=_D2_R50_URL,
+    detectron_device="cpu",
+    default_tta_iters=0,
+    max_tta_iters=30,
+    default_save_mesh=False,
+    default_side_view=False,
+    preload_assets=True,
+    example_rows=(
+        ("demo_data/beagle.jpg", 1e-6, 0, 0.7, 0.1, False, False),
+        ("demo_data/000000015956_horse.png", 1e-6, 0, 0.7, 0.1, False, False),
+        ("demo_data/000000315905_zebra.jpg", 1e-6, 0, 0.7, 0.1, False, False),
+    ),
+    description=(
+        "**Hugging Face Space (cpu-basic)** — lightweight demo: **CPU-only**, Detectron2 **R50-FPN**, "
+        "PRIMA inference. TTA is optional (0 by default; increases runtime). Mesh `.obj` export is off "
+        "by default to save time and disk."
+    ),
+    interface_title="PRIMA on Hugging Face — lightweight CPU demo",
+)
+def _is_truthy_env(var_name: str) -> bool:
+    return os.environ.get(var_name, "").strip().lower() in {"1", "true", "yes", "on"}
+def _running_on_space() -> bool:
+    return bool(os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID"))
+@lru_cache(maxsize=1)
+def get_demo_profile() -> DemoProfile:
+    """Select local vs Space profile. Override with ``PRIMA_DEMO_MODE=local|space``."""
+    override = os.environ.get("PRIMA_DEMO_MODE", "").strip().lower()
+    if override == "local":
+        return LOCAL_DEMO_PROFILE
+    if override == "space":
+        return SPACE_DEMO_PROFILE
+    return SPACE_DEMO_PROFILE if _running_on_space() else LOCAL_DEMO_PROFILE
+def _gradio_examples_for_interface(profile: DemoProfile) -> List[List]:
+    """Gradio prefetches example media at startup (paths must exist beside ``app.py``)."""
+    if _is_truthy_env("PRIMA_DISABLE_GRADIO_EXAMPLES"):
+        return []
+    rows: List[List] = []
+    for rel, *rest in profile.example_rows:
+        p = _REPO_ROOT / rel
+        if p.is_file():
+            rows.append([str(p), *rest])
+    return rows
+def _should_preload_assets(profile: DemoProfile) -> bool:
+    preload_env = os.environ.get("PRIMA_PRELOAD_ASSETS")
+    if preload_env is not None:
+        return _is_truthy_env("PRIMA_PRELOAD_ASSETS")
+    return profile.preload_assets
+def _deeplabcut_available() -> bool:
+    try:
+        from deeplabcut.pose_estimation_pytorch.apis import superanimal_analyze_images  # noqa: F401
+        return True
+    except Exception:
+        return False
+def _preload_assets_once(checkpoint_path: str) -> None:
+    print("[startup] Ensuring demo assets from Hugging Face Hub...")
+    resolve_prima_checkpoint_path(
+        checkpoint_path,
+        data_dir=_REPO_ROOT / "data",
+        auto_download=True,
+        hf_repo_id=os.environ.get("PRIMA_HF_REPO_ID", DEFAULT_HF_ASSET_REPO),
+    )
+    print("[startup] Asset preload complete.")
+def _load_prima_model(checkpoint_path: str = DEFAULT_CHECKPOINT):
+    """Load PRIMA model and renderer once for the Gradio app."""
+    from prima.models import load_prima
+    from prima.utils.renderer import Renderer, cam_crop_to_full
+    checkpoint_path = resolve_prima_checkpoint_path(
+        checkpoint_path,
+        data_dir=_REPO_ROOT / "data",
+        auto_download=True,
+        hf_repo_id=os.environ.get("PRIMA_HF_REPO_ID", DEFAULT_HF_ASSET_REPO),
+    )
+    checkpoint = Path(checkpoint_path)
+    cfg_path = checkpoint.parent.parent / ".hydra" / "config.yaml"
+    if not checkpoint.exists():
+        raise FileNotFoundError(
+            f"Missing checkpoint: {checkpoint}. Download demo checkpoints/data as described in README."
+        )
+    if not cfg_path.exists():
+        raise FileNotFoundError(
+            f"Missing model config: {cfg_path}. Ensure the full checkpoint folder layout from README is present."
+        )
+    profile = get_demo_profile()
+    model, model_cfg = load_prima(checkpoint_path)
+    device = profile.resolve_prima_device()
+    model = model.to(device)
+    model.eval()
+    renderer = Renderer(model_cfg, faces=model.smal.faces)
+    return model, model_cfg, renderer, cam_crop_to_full, device
+def _build_detector(profile: Optional[DemoProfile] = None):
+    """Build Detectron2 animal detector (profile selects X-101+GPU locally vs R50+CPU on Space)."""
+    try:
+        import detectron2.config
+        import detectron2.engine
+        from detectron2 import model_zoo
+    except Exception as e:
+        print(f"[warn] Detectron2 unavailable ({type(e).__name__}: {e}); using full-image fallback bbox.")
+        return None
+    if profile is None:
+        profile = get_demo_profile()
+    config_yaml = profile.detectron_config_yaml
+    weights = profile.detectron_weights_url
+    device_str = profile.resolve_detectron_device()
+    print(f"[detectron2] mode={profile.mode} config={config_yaml} device={device_str}")
+    cfg = detectron2.config.get_cfg()
+    cfg.merge_from_file(model_zoo.get_config_file(config_yaml))
+    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
+    cfg.MODEL.WEIGHTS = weights
+    cfg.MODEL.DEVICE = device_str
+    detector = detectron2.engine.DefaultPredictor(cfg)
+    return detector
+def _load_model_and_detector_for_demo(checkpoint_path: str, profile: DemoProfile):
+    """Load PRIMA and Detectron2 once for the Gradio session (main thread only)."""
+    model, model_cfg, renderer, cam_crop_to_full_fn, device = _load_prima_model(checkpoint_path)
+    detector = _build_detector(profile)
+    return model, model_cfg, renderer, cam_crop_to_full_fn, device, detector
+def _detect_animal_boxes(
+    detector,
+    img_bgr: np.ndarray,
+    det_thresh: float,
+) -> Optional[np.ndarray]:
+    """Return Nx4 XYXY boxes or None if no animal detections."""
+    if detector is None:
+        h, w = img_bgr.shape[:2]
+        return np.array([[0.0, 0.0, float(max(1, w - 1)), float(max(1, h - 1))]], dtype=np.float32)
+    det_out = detector(img_bgr)
+    det_instances = det_out["instances"]
+    boxes, suppressed = select_animal_boxes(det_instances, score_threshold=float(det_thresh))
+    if suppressed > 0:
+        print(f"[INFO] Suppressed {suppressed} duplicate animal detection(s)")
+    if len(boxes) == 0:
+        return None
+    return boxes
+# SuperAnimal defaults (same as in demo_tta parser)
+SUPER_ANIMAL_ARGS = SimpleNamespace(
+    superanimal_name="superanimal_quadruped",
+    superanimal_model_name="hrnet_w32",
+    superanimal_detector_name="fasterrcnn_resnet50_fpn_v2",
+    superanimal_max_individuals=1,
+    saved_2d_model_path="",
+    pytorch_config_2d_path=str(_REPO_ROOT / "configs" / "sa_finetune_hrnet_w32.yaml"),
+)
+def _collect_animal_results(
+    model,
+    model_cfg,
+    renderer,
+    cam_crop_to_full_fn,
+    device,
+    detector,
+    out_folder: str,
+    img_rgb: np.ndarray,
+    tta_lr: float,
+    tta_num_iters: int,
+    det_thresh: float,
+    kp_conf_thresh: float,
+    side_view: bool,
+    save_mesh: bool,
+    boxes: Optional[np.ndarray] = None,
+) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], str | None, str | None]:
+    """Run detection + PRIMA + SuperAnimal + TTA on a single RGB image.
+    Returns:
+        before_imgs: list of HxWx3 RGB images (before TTA) for all animals
+        after_imgs: list of HxWx3 RGB images (after TTA) for all animals
+        kpt_imgs: list of HxWx3 RGB keypoint visualizations
+        first_before_mesh: path to first animal's before-TTA mesh (.obj) or None
+        first_after_mesh: path to first animal's after-TTA mesh (.obj) or None
+    """
+    from prima.utils import recursive_to
+    from prima.datasets.vitdet_dataset import ViTDetDataset
+    from demo_tta import (
+        denorm_patch_to_rgb,
+        resolve_sa_weights_path,
+        run_superanimal_on_patch,
+        save_keypoint_vis,
+        tta_optimize,
+    )
+    if int(tta_num_iters) > 0 and not SUPER_ANIMAL_ARGS.saved_2d_model_path:
+        SUPER_ANIMAL_ARGS.saved_2d_model_path = resolve_sa_weights_path("")
+    img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
+    if boxes is None:
+        boxes = _detect_animal_boxes(detector, img_bgr, det_thresh)
+    if boxes is None:
+        return [], [], [], None, None
+    dataset = ViTDetDataset(model_cfg, img_bgr, boxes)
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0)
+    before_imgs: List[np.ndarray] = []
+    after_imgs: List[np.ndarray] = []
+    kpt_imgs: List[np.ndarray] = []
+    before_mesh_paths: List[str] = []
+    after_mesh_paths: List[str] = []
+    img_token = next(tempfile._get_candidate_names())
+    for batch in dataloader:
+        batch = recursive_to(batch, device)
+        with torch.no_grad():
+            out_before = model(batch)
+        animal_id = int(batch["animalid"][0])
+        # Save/render before TTA
+        img_fn = f"{img_token}"
+        from demo_tta import render_and_save  # imported lazily to avoid circular issues
+        render_and_save(
+            renderer,
+            cam_crop_to_full_fn,
+            out_before,
+            batch,
+            img_fn,
+            animal_id,
+            out_folder,
+            suffix="before_tta",
+            side_view=side_view,
+            save_mesh=save_mesh,
+        )
+        before_png_path = os.path.join(out_folder, f"{img_fn}_{animal_id}_before_tta.png")
+        if os.path.exists(before_png_path):
+            before_bgr = cv2.imread(before_png_path)
+            if before_bgr is not None:
+                before_imgs.append(cv2.cvtColor(before_bgr, cv2.COLOR_BGR2RGB))
+        if save_mesh:
+            before_obj_path = os.path.join(out_folder, f"{img_fn}_{animal_id}_before_tta.obj")
+            if os.path.exists(before_obj_path):
+                before_mesh_paths.append(before_obj_path)
+        if int(tta_num_iters) <= 0:
+            render_and_save(
+                renderer,
+                cam_crop_to_full_fn,
+                out_before,
+                batch,
+                img_fn,
+                animal_id,
+                out_folder,
+                suffix="after_tta",
+                side_view=side_view,
+                save_mesh=save_mesh,
+            )
+            after_png_path = os.path.join(out_folder, f"{img_fn}_{animal_id}_after_tta.png")
+            if os.path.exists(after_png_path):
+                after_bgr = cv2.imread(after_png_path)
+                if after_bgr is not None:
+                    after_imgs.append(cv2.cvtColor(after_bgr, cv2.COLOR_BGR2RGB))
+            if save_mesh:
+                after_obj_path = os.path.join(out_folder, f"{img_fn}_{animal_id}_after_tta.obj")
+                if os.path.exists(after_obj_path):
+                    after_mesh_paths.append(after_obj_path)
+            continue
+        # Prepare patch for SuperAnimal
+        patch_rgb = denorm_patch_to_rgb(batch["img"][0])
+        with tempfile.TemporaryDirectory(prefix=f"dlc_{img_fn}_{animal_id}_") as tmp_dir:
+            bodyparts_xyc = run_superanimal_on_patch(patch_rgb, SUPER_ANIMAL_ARGS, tmp_dir)
+        if bodyparts_xyc is None:
+            # No keypoints => skip TTA for this animal
+            continue
+        kpts_xyc = bodyparts_xyc
+        kpts_xyc[kpts_xyc[:, 2] < float(kp_conf_thresh), 2] = 0.0
+        # Save keypoint visualization and npy
+        kpt_png_path = os.path.join(out_folder, f"{img_fn}_{animal_id}_prima26_kpts.png")
+        save_keypoint_vis(patch_rgb, kpts_xyc, kpt_png_path)
+        npy_path = os.path.join(out_folder, f"{img_fn}_{animal_id}_prima26_kpts.npy")
+        np.save(npy_path, kpts_xyc)
+        if os.path.exists(kpt_png_path):
+            kpt_bgr = cv2.imread(kpt_png_path)
+            if kpt_bgr is not None:
+                kpt_imgs.append(cv2.cvtColor(kpt_bgr, cv2.COLOR_BGR2RGB))
+        # Normalize keypoints to [-0.5, 0.5] as in demo_tta
+        patch_h, patch_w = patch_rgb.shape[:2]
+        kpts_norm = kpts_xyc.copy()
+        kpts_norm[:, 0] = kpts_norm[:, 0] / float(patch_w) - 0.5
+        kpts_norm[:, 1] = kpts_norm[:, 1] / float(patch_h) - 0.5
+        gt_kpts_norm = torch.from_numpy(kpts_norm[None]).to(device=device, dtype=batch["img"].dtype)
+        # Run TTA
+        out_after = tta_optimize(
+            model,
+            batch,
+            gt_kpts_norm,
+            num_iters=int(tta_num_iters),
+            lr=float(tta_lr),
+        )
+        render_and_save(
+            renderer,
+            cam_crop_to_full_fn,
+            out_after,
+            batch,
+            img_fn,
+            animal_id,
+            out_folder,
+            suffix="after_tta",
+            side_view=side_view,
+            save_mesh=save_mesh,
+        )
+        after_png_path = os.path.join(out_folder, f"{img_fn}_{animal_id}_after_tta.png")
+        if os.path.exists(after_png_path):
+            after_bgr = cv2.imread(after_png_path)
+            if after_bgr is not None:
+                after_imgs.append(cv2.cvtColor(after_bgr, cv2.COLOR_BGR2RGB))
+        if save_mesh:
+            after_obj_path = os.path.join(out_folder, f"{img_fn}_{animal_id}_after_tta.obj")
+            if os.path.exists(after_obj_path):
+                after_mesh_paths.append(after_obj_path)
+    first_before_mesh = before_mesh_paths[0] if before_mesh_paths else None
+    first_after_mesh = after_mesh_paths[0] if after_mesh_paths else None
+    return before_imgs, after_imgs, kpt_imgs, first_before_mesh, first_after_mesh
+def build_demo(checkpoint_path: str = DEFAULT_CHECKPOINT, out_folder: str = DEFAULT_OUT_FOLDER) -> gr.Interface:
+    profile = get_demo_profile()
+    print(
+        f"[demo] profile={profile.mode} prima={profile.resolve_prima_device()} "
+        f"detectron={profile.detectron_config_yaml} d2_device={profile.resolve_detectron_device()}"
+    )
+    os.makedirs(out_folder, exist_ok=True)
+    runtime_cache = {
+        "model": None,
+        "model_cfg": None,
+        "renderer": None,
+        "cam_crop_to_full_fn": None,
+        "device": None,
+        "detector": None,
+    }
+    def gradio_inference(
+        image: np.ndarray,
+        tta_lr: float,
+        tta_num_iters: int,
+        det_thresh: float,
+        kp_conf_thresh: float,
+        side_view: bool,
+        save_mesh: bool,
+    ):
+        """Wrapper for Gradio. ``image`` is an RGB numpy array.
+        Yields intermediate status so long first-run (Hub downloads + model load)
+        and long inference do not hit silent client/proxy WebSocket timeouts.
+        """
+        if image is None:
+            yield None, None, None, "No image provided."
+            return
+        if int(tta_num_iters) > 0 and not _deeplabcut_available():
+            yield (
+                None,
+                None,
+                None,
+                "DeepLabCut is not installed. Set **TTA iterations** to **0** for PRIMA-only inference, "
+                "or install `deeplabcut` (see README / requirements.txt).",
+            )
+            return
+        if image.dtype != np.uint8:
+            img_rgb = np.clip(image, 0, 255).astype(np.uint8)
+        else:
+            img_rgb = image
+        yield None, None, None, "Queued; preparing run…"
+        if runtime_cache["model"] is None:
+            yield (
+                None,
+                None,
+                None,
+                "First run: downloading demo assets from Hugging Face (large checkpoint) "
+                "and loading the model. This can take many minutes.",
+            )
+            try:
+                model, model_cfg, renderer, cam_crop_to_full_fn, device, detector = _load_model_and_detector_for_demo(
+                    checkpoint_path, profile
+                )
+            except Exception:
+                yield None, None, None, f"Model initialization failed:\n{traceback.format_exc()}"
+                return
+            runtime_cache["model"] = model
+            runtime_cache["model_cfg"] = model_cfg
+            runtime_cache["renderer"] = renderer
+            runtime_cache["cam_crop_to_full_fn"] = cam_crop_to_full_fn
+            runtime_cache["device"] = device
+            runtime_cache["detector"] = detector
+            yield None, None, None, "Model loaded."
+        try:
+            yield None, None, None, "Running animal detection…"
+            img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
+            boxes = _detect_animal_boxes(runtime_cache["detector"], img_bgr, det_thresh)
+            if boxes is None:
+                yield (
+                    None,
+                    None,
+                    None,
+                    "No animal detected. Try lowering the detection threshold or another image.",
+                )
+                return
+            yield (
+                None,
+                None,
+                None,
+                f"Detected {len(boxes)} animal region(s). Running PRIMA (+ SuperAnimal/TTA if enabled)…",
+            )
+            before_imgs, after_imgs, kpt_imgs, mesh_before, mesh_after = _collect_animal_results(
+                runtime_cache["model"],
+                runtime_cache["model_cfg"],
+                runtime_cache["renderer"],
+                runtime_cache["cam_crop_to_full_fn"],
+                runtime_cache["device"],
+                runtime_cache["detector"],
+                out_folder,
+                img_rgb,
+                tta_lr=tta_lr,
+                tta_num_iters=tta_num_iters,
+                det_thresh=det_thresh,
+                kp_conf_thresh=kp_conf_thresh,
+                side_view=side_view,
+                save_mesh=save_mesh,
+                boxes=boxes,
+            )
+        except Exception:
+            yield None, None, None, f"Inference failed:\n{traceback.format_exc()}"
+            return
+        first_before = before_imgs[0] if before_imgs else None
+        first_after = after_imgs[0] if after_imgs else None
+        first_kpts = kpt_imgs[0] if kpt_imgs else None
+        if first_before is None and first_after is None:
+            yield (
+                None,
+                None,
+                None,
+                "No output generated. Try an image with a clearly visible quadruped.",
+            )
+            return
+        yield first_before, first_after, first_kpts, "OK"
+    _gradio_examples = _gradio_examples_for_interface(profile)
+    _iface_kw = dict(
+        fn=gradio_inference,
+        analytics_enabled=False,
+        cache_examples=False,
+        inputs=[
+            gr.Image(
+                label="Input image",
+                type="numpy",
+                sources=["upload", "clipboard"],
+            ),
+            gr.Slider(
+                label="TTA learning rate",
+                minimum=1e-7,
+                maximum=1e-4,
+                value=1e-6,
+                step=1e-7,
+            ),
+            gr.Slider(
+                label="TTA iterations",
+                minimum=0,
+                maximum=profile.max_tta_iters,
+                value=profile.default_tta_iters,
+                step=1,
+                info="Set to 0 to disable TTA and reuse the initial PRIMA prediction.",
+            ),
+            gr.Slider(
+                label="Detection threshold",
+                minimum=0.3,
+                maximum=0.9,
+                value=0.7,
+                step=0.05,
+            ),
+            gr.Slider(
+                label="Keypoint confidence threshold",
+                minimum=0.0,
+                maximum=1.0,
+                value=0.1,
+                step=0.05,
+            ),
+            gr.Checkbox(label="Render side view", value=profile.default_side_view),
+            gr.Checkbox(label="Save meshes (.obj)", value=profile.default_save_mesh),
+        ],
+        outputs=[
+            gr.Image(label="Before TTA"),
+            gr.Image(label="After TTA"),
+            gr.Image(label="PRIMA 26 keypoints"),
+            gr.Textbox(label="Status / Traceback", lines=12),
+        ],
+        title=profile.interface_title,
+        description=profile.description,
+    )
+    if _gradio_examples:
+        _iface_kw["examples"] = _gradio_examples
+    demo = gr.Interface(**_iface_kw)
+    demo.queue(max_size=8, default_concurrency_limit=1)
+    return demo
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Gradio demo for PRIMA + SuperAnimal + TTA")
+    parser.add_argument(
+        "--checkpoint",
+        type=str,
+        default=DEFAULT_CHECKPOINT,
+        help="Path to the pretrained PRIMA checkpoint",
+    )
+    parser.add_argument(
+        "--out_folder",
+        type=str,
+        default=DEFAULT_OUT_FOLDER,
+        help="Folder used to save rendered outputs and meshes",
+    )
+    return parser.parse_args()
+if __name__ == "__main__":
+    args = parse_args()
+    profile = get_demo_profile()
+    if _should_preload_assets(profile):
+        _preload_assets_once(args.checkpoint)
+    demo = build_demo(checkpoint_path=args.checkpoint, out_folder=args.out_folder)
+    demo.launch(inbrowser=False)

chumpy/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from __future__ import annotations
+"""
+PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation
+Official implementation of the paper:
+"PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation"
+by Xiaohang Yu, Ti Wang, and Mackenzie Weygandt Mathis
+Licensed under a modified MIT license
+"""
+"""Minimal ``chumpy`` compatibility for unpickling legacy SMAL model configs."""
+from .ch import Ch, ChArray, materialize
+__all__ = ["Ch", "ChArray", "materialize"]

chumpy/ch.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from __future__ import annotations
+"""
+PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation
+Official implementation of the paper:
+"PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation"
+by Xiaohang Yu, Ti Wang, and Mackenzie Weygandt Mathis
+Licensed under a modified MIT license
+"""
+"""``chumpy.ch`` namespace expected by legacy SMAL pickles."""
+import numpy as np
+class Ch:
+    """Minimal stand-in for ``chumpy.ch.Ch`` (unpickling only)."""
+    def __init__(self, *args, **kwargs):
+        self._data = None
+        if args:
+            self._data = np.asarray(args[0])
+    def _resolve(self) -> np.ndarray:
+        # Real chumpy Ch instances store the underlying ndarray on attribute ``x``;
+        # legacy pickles unpickle by restoring ``__dict__`` without calling ``__init__``,
+        # so try common attribute names before falling back to ``_data``.
+        for attr in ("x", "_x", "_data"):
+            val = self.__dict__.get(attr)
+            if val is not None:
+                return np.asarray(val)
+        if self._data is not None:
+            return np.asarray(self._data)
+        return np.zeros((), dtype=np.float32)
+    @property
+    def r(self) -> np.ndarray:
+        return self._resolve()
+    def __array__(self, dtype=None):
+        arr = self.r()
+        if dtype is not None:
+            arr = arr.astype(dtype, copy=False)
+        return arr
+class ChArray(np.ndarray):
+    """Minimal stand-in for ``chumpy.ch.ChArray``."""
+def materialize(value, dtype=np.float32) -> np.ndarray:
+    """Recursively unwrap ``Ch`` / object arrays from legacy SMAL pickles."""
+    if isinstance(value, Ch):
+        return np.asarray(value.r(), dtype=dtype)
+    if isinstance(value, np.ndarray):
+        if value.dtype == object:
+            flat = [materialize(x, dtype=dtype) for x in value.ravel()]
+            return np.stack(flat).reshape(value.shape)
+        return np.asarray(value, dtype=dtype)
+    if isinstance(value, (list, tuple)):
+        return np.asarray([materialize(x, dtype=dtype) for x in value], dtype=dtype)
+    return np.asarray(value, dtype=dtype)
+__all__ = ["Ch", "ChArray", "materialize"]

configs/sa_finetune_hrnet_w32.yaml ADDED Viewed

	@@ -0,0 +1,220 @@

+# DeepLabCut pytorch_config for the PRIMA TTA 2D pose model:
+# SuperAnimal-Quadruped HRNet-w32 backbone fine-tuned on Animal3D, with
+# the heatmap head re-trained for the 26-joint Animal3D / PRIMA layout.
+#
+# Used by demo_tta.py via DLC's `superanimal_analyze_images(...,
+# customized_model_config=<this yaml>, customized_pose_checkpoint=<your
+# fine-tuned .pt>)`. Only the pose model is fine-tuned; the bounding-box
+# detector (Faster R-CNN) is the stock SuperAnimal-Quadruped one
+# resolved by DLC at runtime.
+data:
+  bbox_margin: 20
+  colormode: RGB
+  inference:
+    normalize_images: true
+    top_down_crop:
+      width: 256
+      height: 256
+    auto_padding:
+      pad_width_divisor: 32
+      pad_height_divisor: 32
+  train:
+    affine:
+      p: 0.5
+      rotation: 30
+      scaling:
+      - 1.0
+      - 1.0
+      translation: 0
+    gaussian_noise: 12.75
+    motion_blur: true
+    normalize_images: true
+    top_down_crop:
+      width: 256
+      height: 256
+    auto_padding:
+      pad_width_divisor: 32
+      pad_height_divisor: 32
+detector:
+  data:
+    colormode: RGB
+    inference:
+      normalize_images: true
+    train:
+      affine:
+        p: 0.5
+        rotation: 30
+        scaling:
+        - 1.0
+        - 1.0
+        translation: 40
+      collate:
+        type: ResizeFromDataSizeCollate
+        min_scale: 0.4
+        max_scale: 1.0
+        min_short_side: 128
+        max_short_side: 1152
+        multiple_of: 32
+        to_square: false
+      hflip: true
+      normalize_images: true
+  device: auto
+  model:
+    type: FasterRCNN
+    freeze_bn_stats: true
+    freeze_bn_weights: false
+    variant: fasterrcnn_resnet50_fpn_v2
+  runner:
+    type: DetectorTrainingRunner
+    key_metric: test.mAP@50:95
+    key_metric_asc: true
+    eval_interval: 10
+    optimizer:
+      type: AdamW
+      params:
+        lr: 0.0001
+    scheduler:
+      type: LRListScheduler
+      params:
+        milestones:
+        - 160
+        lr_list:
+        - - 1e-05
+    snapshots:
+      max_snapshots: 5
+      save_epochs: 25
+      save_optimizer_state: false
+  train_settings:
+    batch_size: 1
+    dataloader_workers: 0
+    dataloader_pin_memory: false
+    display_iters: 500
+    epochs: 250
+device: auto
+inference:
+  multithreading:
+    enabled: true
+    queue_length: 4
+    timeout: 30.0
+  compile:
+    enabled: false
+    backend: inductor
+  autocast:
+    enabled: false
+metadata:
+  project_path: ""
+  pose_config_path: ""
+  bodyparts:
+  - left_eye
+  - right_eye
+  - chin
+  - left_front_paw
+  - right_front_paw
+  - left_back_paw
+  - right_back_paw
+  - tail_base
+  - left_front_thigh
+  - right_front_thigh
+  - left_back_thigh
+  - right_back_thigh
+  - left_shoulder
+  - right_shoulder
+  - left_front_knee
+  - right_front_knee
+  - left_back_knee
+  - right_back_knee
+  - neck_base
+  - tail_mid
+  - left_ear_base
+  - right_ear_base
+  - left_mouth_corner
+  - right_mouth_corner
+  - nose
+  - tail_tip_first
+  unique_bodyparts: []
+  individuals:
+  - individual000
+  with_identity: false
+method: td
+model:
+  backbone:
+    type: HRNet
+    model_name: hrnet_w32
+    freeze_bn_stats: true
+    freeze_bn_weights: false
+    interpolate_branches: false
+    increased_channel_count: false
+  backbone_output_channels: 32
+  heads:
+    bodypart:
+      type: HeatmapHead
+      weight_init: normal
+      predictor:
+        type: HeatmapPredictor
+        apply_sigmoid: false
+        clip_scores: true
+        location_refinement: true
+        locref_std: 7.2801
+      target_generator:
+        type: HeatmapGaussianGenerator
+        num_heatmaps: 26
+        pos_dist_thresh: 17
+        heatmap_mode: KEYPOINT
+        gradient_masking: true
+        background_weight: 0.0
+        generate_locref: true
+        locref_std: 7.2801
+      criterion:
+        heatmap:
+          type: WeightedMSECriterion
+          weight: 1.0
+        locref:
+          type: WeightedHuberCriterion
+          weight: 0.05
+      heatmap_config:
+        channels:
+        - 32
+        kernel_size: []
+        strides: []
+        final_conv:
+          out_channels: 26
+          kernel_size: 1
+      locref_config:
+        channels:
+        - 32
+        kernel_size: []
+        strides: []
+        final_conv:
+          out_channels: 52
+          kernel_size: 1
+net_type: hrnet_w32
+runner:
+  type: PoseTrainingRunner
+  gpus:
+  key_metric: test.mAP
+  key_metric_asc: true
+  eval_interval: 10
+  optimizer:
+    type: AdamW
+    params:
+      lr: 0.0001
+  scheduler:
+    type: LRListScheduler
+    params:
+      lr_list:
+      - - 1e-05
+      - - 1e-06
+      milestones:
+      - 160
+      - 190
+  snapshots:
+    max_snapshots: 5
+    save_epochs: 10
+    save_optimizer_state: false
+train_settings:
+  batch_size: 64
+  dataloader_workers: 8
+  dataloader_pin_memory: false
+  display_iters: 500
+  epochs: 200
+  seed: 42

configs_hydra/experiment/default.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+# @package _global_
+SMAL:
+  DATA_DIR: data/smal
+  MODEL_PATH: data/smal/my_smpl_00781_4_all.pkl
+  SHAPE_PRIOR_PATH: data/smal/my_smpl_data_00781_4_all.pkl
+  POSE_PRIOR_PATH: data/smal/walking_toy_symmetric_pose_prior_with_cov_35parts.pkl
+  NUM_JOINTS: 34
+EXTRA:
+  FOCAL_LENGTH: 1000
+  NUM_LOG_IMAGES: 4
+  NUM_LOG_SAMPLES_PER_IMAGE: 4
+  PELVIS_IND: 0
+DATASETS:
+  CONFIG:
+    SCALE_FACTOR: 0.3
+    ROT_FACTOR: 30
+    TRANS_FACTOR: 0.02
+    COLOR_SCALE: 0.2
+    ROT_AUG_RATE: 0.6
+    TRANS_AUG_RATE: 0.5
+    DO_FLIP: False
+    FLIP_AUG_RATE: 0.0
+    EXTREME_CROP_AUG_RATE: 0.0
+    EXTREME_CROP_AUG_LEVEL: 1

configs_hydra/experiment/default_val.yaml ADDED Viewed

	@@ -0,0 +1,34 @@

+# @package _global_
+DATASETS:
+  ANIMAL3D:
+    ROOT_IMAGE: ./datasets/animal3d/
+    JSON_FILE:
+      TEST: ./datasets/animal3d/test.json
+  CONTROL_ANIMAL3D:
+    ROOT_IMAGE: ./datasets/control_animal3dlatest/
+    JSON_FILE:
+      TEST: ./datasets/control_animal3dlatest/test.json
+  QUADRUPED2D:
+    ROOT_IMAGE: ./datasets/quadruped2d/
+    JSON_FILE:
+      TEST: ./datasets/quadruped2d/test.json
+  ANIMAL_KINGDOM:
+    ROOT_IMAGE: ./datasets/Animal_Kingdom_test/
+    JSON_FILE:
+      TEST: ./datasets/Animal_Kingdom_test/test.json
+  CONFIG:
+    SCALE_FACTOR: 0.0
+    ROT_FACTOR: 0
+    TRANS_FACTOR: 0.0
+    COLOR_SCALE: 0.0
+    ROT_AUG_RATE: 0.0
+    TRANS_AUG_RATE: 0.0
+    DO_FLIP: False
+    FLIP_AUG_RATE: 0.0
+    EXTREME_CROP_AUG_RATE: 0.0
+    EXTREME_CROP_AUG_LEVEL: 1
+METRIC:
+  PCK_THRESHOLD: [0.10, 0.15]

configs_hydra/experiment/primaStage1.yaml ADDED Viewed

	@@ -0,0 +1,83 @@

+# @package _global_
+defaults:
+  - default.yaml
+GENERAL:
+  TOTAL_STEPS: 63_000
+  LOG_STEPS: 63
+  VAL_STEPS: 63
+  VAL_EPOCHS: 1
+  CHECKPOINT_EPOCHS: 1
+  CHECKPOINT_SAVE_TOP_K: 2
+  NUM_WORKERS: 8
+  PREFETCH_FACTOR: 2
+LOSS_WEIGHTS:
+  KEYPOINTS_3D: 0.05
+  KEYPOINTS_2D: 0.01
+  INTERMEDIATE_KP2D: 0.001
+  INTERMEDIATE_KP3D: 0.001
+  GLOBAL_ORIENT: 0.005
+  POSE: 0.001
+  BETAS: 0.0005
+  TRANSL: 0.0005
+  ADVERSARIAL: 0.0005
+  SUPCON: 0.0005
+TRAIN:
+  LR: 3.75e-6
+  WEIGHT_DECAY: 1e-4
+  BATCH_SIZE: 48
+  LOSS_REDUCTION: mean
+  NUM_TRAIN_SAMPLES: 2
+  NUM_TEST_SAMPLES: 64
+  POSE_2D_NOISE_RATIO: 0.01
+  SMPL_PARAM_NOISE_RATIO: 0.005
+MODEL:
+  IMAGE_SIZE: 256
+  IMAGE_MEAN: [0.485, 0.456, 0.406]
+  IMAGE_STD: [0.229, 0.224, 0.225]
+  BACKBONE:
+    TYPE: vith
+    PRETRAINED_WEIGHTS: ./data/amr_vitbb.pth
+    FREEZE: False
+  # Enable BioClip embedding
+  USE_BIOCLIP_EMBEDDING: True
+  BIOCLIP_EMBEDDING:
+    EMBED_DIM: 1280  # Match DINOv2 output dimension for token-wise concatenation
+    TYPE: bioclip1
+  # Enable 2D keypoint embedding for initialization; NewBioGuidedSMALPoseDecoder updates it dynamically
+  USE_KEYPOINT_EMBEDDING: False
+  SMAL_HEAD:
+    TYPE: new_bio_pose_transformer_decoder    # Use the newer version with SAM3D-style hierarchical updates
+    IN_CHANNELS: 1280
+    IEF_ITERS: 3
+    # Pose Transformer Decoder configuration
+    DECODER_DIM: 1280
+    NUM_DECODER_LAYERS: 6
+    NUM_HEADS: 8
+    MLP_RATIO: 4.0
+    # Keypoint token configuration specific to NewBioGuidedSMALPoseDecoder
+    USE_KEYPOINT_2D_TOKENS: True            # Enable 2D keypoint tokens with SAM3D-style dynamic updates
+    USE_KEYPOINT_3D_TOKENS: True            # Enable 3D keypoint tokens with pelvis normalization
+    KEYPOINT_TOKEN_UPDATE: True             # Enable hierarchical keypoint prediction and token updates
+    KP2D_INJECT_IMAGE_FEAT: True            # Key setting: inject image features via grid_sample
+DATASETS:
+  ANIMAL3D:
+    ROOT_IMAGE: ./datasets/animal3d/
+    JSON_FILE:
+      TRAIN: ./datasets/animal3d/train.json
+      TEST: ./datasets/animal3d/test.json
+    WEIGHT: 1.0

configs_hydra/experiment/primaStage2.yaml ADDED Viewed

	@@ -0,0 +1,113 @@

+# @package _global_
+defaults:
+  - default.yaml
+GENERAL:
+  TOTAL_STEPS: 450_000
+  LOG_STEPS: 533
+  VAL_STEPS: 533
+  VAL_EPOCHS: 1
+  CHECKPOINT_EPOCHS: 1
+  CHECKPOINT_SAVE_TOP_K: 2
+  NUM_WORKERS: 2
+  PREFETCH_FACTOR: 2
+LOSS_WEIGHTS:
+  KEYPOINTS_3D: 0.05
+  KEYPOINTS_2D: 0.01
+  INTERMEDIATE_KP2D: 0.001
+  INTERMEDIATE_KP3D: 0.001
+  GLOBAL_ORIENT: 0.005
+  POSE: 0.001
+  BETAS: 0.0005
+  TRANSL: 0.0005
+  ADVERSARIAL: 0.0
+  SUPCON: 0.0005
+TRAIN:
+  LR: 3.75e-6
+  WEIGHT_DECAY: 1e-4
+  BATCH_SIZE: 48
+  LOSS_REDUCTION: mean
+  NUM_TRAIN_SAMPLES: 2
+  NUM_TEST_SAMPLES: 64
+  POSE_2D_NOISE_RATIO: 0.01
+  SMPL_PARAM_NOISE_RATIO: 0.005
+MODEL:
+  IMAGE_SIZE: 256
+  IMAGE_MEAN: [0.485, 0.456, 0.406]
+  IMAGE_STD: [0.229, 0.224, 0.225]
+  BACKBONE:
+    TYPE: vith
+    PRETRAINED_WEIGHTS: ./data/amr_vitbb.pth
+    FREEZE: False
+  # Enable BioClip embedding
+  USE_BIOCLIP_EMBEDDING: True
+  BIOCLIP_EMBEDDING:
+    EMBED_DIM: 1280  # Match vit output dimension for token-wise concatenation
+    TYPE: bioclip1
+  # Enable 2D keypoint embedding
+  USE_KEYPOINT_EMBEDDING: False
+  KEYPOINT_EMBEDDING:
+    NUM_KEYPOINTS: 26        # Number of SMAL keypoints
+    KEYPOINT_DIM: 2          # 2D coordinates (x, y)
+    EMBED_DIM: 1280          # Match vit output dimension
+    HIDDEN_DIM: 512          # Hidden layer dimension in MLP
+    TYPE: 'token'            # Use token-based embedding (recommended)
+  SMAL_HEAD:
+    TYPE: new_bio_pose_transformer_decoder    # Use the newer version with SAM3D-style hierarchical updates
+    IN_CHANNELS: 1280
+    IEF_ITERS: 1
+    # Pose Transformer Decoder configuration
+    DECODER_DIM: 1280
+    NUM_DECODER_LAYERS: 6
+    NUM_HEADS: 8
+    MLP_RATIO: 4.0
+    # Keypoint token configuration specific to NewBioGuidedSMALPoseDecoder
+    USE_KEYPOINT_2D_TOKENS: True            # Enable 2D keypoint tokens with SAM3D-style dynamic updates
+    USE_KEYPOINT_3D_TOKENS: True            # Enable 3D keypoint tokens with pelvis normalization
+    KEYPOINT_TOKEN_UPDATE: True             # Enable hierarchical keypoint prediction and token updates
+    KP2D_INJECT_IMAGE_FEAT: True            # Key setting: inject image features via grid_sample
+    # Legacy transformer config (kept for compatibility)
+    TRANSFORMER_DECODER:
+      depth: 6
+      heads: 8
+      mlp_dim: 1024
+      dim_head: 64
+      dropout: 0.0
+      emb_dropout: 0.0
+      norm: layer
+      context_dim: 1280
+DATASETS:
+  ANIMAL3D:
+    ROOT_IMAGE: ./datasets/animal3d/
+    JSON_FILE:
+      TRAIN: ./datasets/animal3d/train.json
+      TEST: ./datasets/animal3d/test.json
+    WEIGHT: 1.0
+  CONTROL_ANIMAL3D:
+    ROOT_IMAGE: ./datasets/control_animal3dlatest/
+    JSON_FILE:
+      TRAIN: ./datasets/control_animal3dlatest/train.json
+      TEST: ./datasets/control_animal3dlatest/test.json
+    WEIGHT: 0.5
+  QUADRUPED2D:
+    ROOT_IMAGE: ./datasets/quadruped2d/
+    JSON_FILE:
+      TRAIN: ./datasets/quadruped2d/train.json
+      TEST: ./datasets/quadruped2d/test.json
+    WEIGHT: 0.15

configs_hydra/extras/default.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+# disable python warnings if they annoy you
+ignore_warnings: False
+# ask user for tags if none are provided in the config
+enforce_tags: True
+# pretty print config tree at the start of the run using Rich library
+print_config: True

configs_hydra/hydra/default.yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+# @package _global_
+# https://hydra.cc/docs/configure_hydra/intro/
+# enable color logging
+defaults:
+  - override /hydra/hydra_logging: colorlog
+  - override /hydra/job_logging: colorlog
+# exp_name: ovrd_${hydra:job.override_dirname}
+exp_name: ${now:%Y-%m-%d}_${now:%H-%M-%S}
+hydra:
+  run:
+    dir: ${paths.log_dir}/${task_name}/runs/${exp_name}
+  sweep:
+    dir: ${paths.log_dir}/${task_name}/multiruns/${exp_name}
+    subdir: ${hydra.job.num}
+  job:
+    config:
+      override_dirname:
+        exclude_keys:
+          - trainer
+          - trainer.devices
+          - trainer.num_nodes
+          - callbacks
+          - debug

configs_hydra/launcher/local.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+# @package _global_
+defaults:
+  - override /hydra/launcher: submitit_local
+hydra:
+  launcher:
+    timeout_min: 10_080   # 7 days
+    nodes: 1
+    tasks_per_node: ${trainer.devices}
+    cpus_per_task: 8
+    gpus_per_node: ${trainer.devices}
+    name: amr

configs_hydra/launcher/slurm.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+# @package _global_
+defaults:
+  - override /hydra/launcher: submitit_slurm
+hydra:
+  launcher:
+    timeout_min: 10_080   # 7 days
+    max_num_timeout: 3
+    partition: g40
+    qos: idle
+    nodes: 1
+    tasks_per_node: ${trainer.devices}
+    gpus_per_task: null
+    cpus_per_task: 12
+    gpus_per_node: ${trainer.devices}
+    cpus_per_gpu: null
+    comment: prima
+    name: prima
+    setup:
+      - module load cuda openmpi libfabric-aws
+      - export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7

configs_hydra/paths/default.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+# path to root directory
+# this requires PROJECT_ROOT environment variable to exist
+# PROJECT_ROOT is inferred and set by pyrootutils package in `train.py` and `eval.py`
+root_dir: ${oc.env:PROJECT_ROOT}
+# path to data directory
+data_dir: ${paths.root_dir}/data/
+# path to logging directory
+log_dir: logs/
+# path to output directory, created dynamically by hydra
+# path generation pattern is specified in `configs/hydra/default.yaml`
+# use it to store all files generated during the run, like ckpts and metrics
+output_dir: ${hydra:runtime.output_dir}
+# path to working directory
+work_dir: ${hydra:runtime.cwd}

configs_hydra/train.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+# @package _global_
+# specify here default configuration
+# order of defaults determines the order in which configs override each other
+defaults:
+  - _self_
+  - trainer: ddp.yaml
+  - paths: default.yaml
+  - extras: default.yaml
+  - hydra: default.yaml
+  # experiment configs allow for version control of specific hyperparameters
+  # e.g. best hyperparameters for given model and datamodule
+  - experiment: null
+  - texture_exp: null
+  # optional local config for machine/user specific settings
+  # it's optional since it doesn't need to exist and is excluded from version control
+  - optional launcher: local.yaml
+  # - optional launcher: slurm.yaml
+  # debugging config (enable through command line, e.g. `python train.py debug=default)
+  - debug: null
+# task name, determines output directory path
+task_name: "train"
+# tags to help you identify your experiments
+# you can overwrite this in experiment configs
+# overwrite from command line with `python train.py tags="[first_tag, second_tag]"`
+# appending lists from command line is currently not supported :(
+# https://github.com/facebookresearch/hydra/issues/1547
+tags: ["dev"]
+# set False to skip model training
+train: True
+# evaluate on test set, using best model weights achieved during training
+# lightning chooses best weights based on the metric specified in checkpoint callback
+test: False
+# simply provide checkpoint path to resume training
+ckpt_path: True
+# seed for random number generators in pytorch, numpy and python.random
+seed: null

configs_hydra/trainer/cpu.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+defaults:
+  - default.yaml
+  - default_amr.yaml
+accelerator: cpu
+devices: 1

configs_hydra/trainer/ddp.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+defaults:
+  - default.yaml
+  - default_amr.yaml
+# use "ddp_spawn" instead of "ddp",
+# it's slower but normal "ddp" currently doesn't work ideally with hydra
+# https://github.com/facebookresearch/hydra/issues/2070
+# https://pytorch-lightning.readthedocs.io/en/latest/accelerators/gpu_intermediate.html#distributed-data-parallel-spawn
+strategy: ddp_spawn
+accelerator: gpu
+devices: 2
+num_nodes: 1
+sync_batchnorm: True

configs_hydra/trainer/default.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+_target_: pytorch_lightning.Trainer
+default_root_dir: ${paths.output_dir}
+accelerator: gpu
+devices: 1
+# set True to to ensure deterministic results
+# makes training slower but gives more reproducibility than just setting seeds
+deterministic: False

configs_hydra/trainer/default_amr.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+num_sanity_val_steps: 0
+log_every_n_steps: ${GENERAL.LOG_STEPS}
+val_check_interval: ${GENERAL.VAL_STEPS}  # How often within one training epoch to check the validation set.
+check_val_every_n_epoch: ${GENERAL.VAL_EPOCHS}  # Check val every n train epochs.
+precision: 16-mixed  # 16-mixed, 32
+max_steps: ${GENERAL.TOTAL_STEPS}
+# move_metrics_to_cpu: True
+limit_val_batches: 80  # How much of validation dataset to check.
+# track_grad_norm: -1

configs_hydra/trainer/gpu.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+defaults:
+  - default.yaml
+  - default_amr.yaml
+accelerator: gpu
+devices: 1

configs_hydra/trainer/mps.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+defaults:
+  - default.yaml
+  - default_amr.yaml
+accelerator: mps
+devices: 1

demo.py ADDED Viewed

	@@ -0,0 +1,189 @@

+"""
+PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation
+Official implementation of the paper:
+"PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation"
+by Xiaohang Yu, Ti Wang, and Mackenzie Weygandt Mathis
+Licensed under a modified MIT license
+"""
+from pathlib import Path
+import detectron2.config
+import detectron2.engine
+import torch
+import argparse
+import os
+import cv2
+import numpy as np
+from tqdm import tqdm
+import torch.utils
+import torch.utils.data
+from prima.models import load_prima
+from prima.utils import recursive_to
+from prima.datasets.vitdet_dataset import ViTDetDataset, DEFAULT_MEAN, DEFAULT_STD
+from prima.utils.detection import select_animal_boxes
+from prima.utils.weights import DEFAULT_HF_REPO_ID, resolve_prima_checkpoint_path
+import detectron2
+from detectron2 import model_zoo
+import warnings
+warnings.filterwarnings("ignore")
+LIGHT_BLUE = (0.65098039, 0.74117647, 0.85882353)
+GREEN = (0.65, 0.86, 0.74)
+REPO_ROOT = Path(__file__).resolve().parent
+def load_renderer_components():
+    try:
+        from prima.utils.renderer import Renderer, cam_crop_to_full
+    except Exception as exc:
+        raise RuntimeError(
+            "Cannot initialize the PRIMA renderer. Rendering requires a working "
+            "pyrender/OpenGL backend such as EGL or OSMesa. Install the missing "
+            "OpenGL runtime for this environment, or run in an environment where "
+            "PYOPENGL_PLATFORM=egl/osmesa works."
+        ) from exc
+    return Renderer, cam_crop_to_full
+def main():
+    parser = argparse.ArgumentParser(description='prima demo code')
+    parser.add_argument('--checkpoint', type=str, default='',
+                        help='Path to pretrained model checkpoint. Empty -> auto-download the default Stage 1 checkpoint.')
+    parser.add_argument('--hf-repo-id', '--hf_repo_id', dest='hf_repo_id',
+                        type=str, default=os.environ.get("PRIMA_HF_REPO_ID", DEFAULT_HF_REPO_ID),
+                        help='Hugging Face repo ID containing PRIMA demo assets')
+    parser.add_argument('--no-auto-download', '--no_auto_download', dest='no_auto_download', action='store_true',
+                        help='Disable automatic download of missing PRIMA demo assets')
+    parser.add_argument('--img_folder', type=str, default='demo_data/', help='Folder with input images')
+    parser.add_argument('--out_folder', type=str, default='demo_out', help='Output folder to save rendered results')
+    parser.add_argument('--side_view', dest='side_view', action='store_true', default=False,
+                        help='If set, render side view also')
+    parser.add_argument('--save_mesh', dest='save_mesh', action='store_true', default=False,
+                        help='If set, save meshes to disk also')
+    parser.add_argument('--batch_size', type=int, default=1, help='Batch size for inference/fitting')
+    parser.add_argument('--file_type', nargs='+', default=['*.jpg', '*.png', '*.jpeg', '*.JPEG'],
+                        help='List of file extensions to consider')
+    args = parser.parse_args()
+    checkpoint_path = resolve_prima_checkpoint_path(
+        args.checkpoint,
+        data_dir=REPO_ROOT / "data",
+        auto_download=not args.no_auto_download,
+        hf_repo_id=args.hf_repo_id,
+    )
+    model, model_cfg = load_prima(checkpoint_path)
+    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+    model = model.to(device)
+    model.eval()
+    # Setup the renderer
+    Renderer, cam_crop_to_full = load_renderer_components()
+    renderer = Renderer(model_cfg, faces=model.smal.faces)
+    # Make output directory if it does not exist
+    os.makedirs(args.out_folder, exist_ok=True)
+    # Load detector
+    cfg = detectron2.config.get_cfg()
+    cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"))
+    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
+    cfg.MODEL.WEIGHTS = "https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/model_final_68b088.pkl"
+    cfg.MODEL.DEVICE = device.type
+    detector = detectron2.engine.DefaultPredictor(cfg)
+    img_paths = sorted([img for end in args.file_type for img in Path(args.img_folder).glob(end)])
+    num_readable_images = 0
+    num_rendered_results = 0
+    num_suppressed_detections = 0
+    for img_path in img_paths:
+        img_bgr = cv2.imread(str(img_path))
+        if img_bgr is None:
+            print(f"[WARN] Cannot read image: {img_path}")
+            continue
+        num_readable_images += 1
+        # Detect animals in image
+        det_out = detector(img_bgr)
+        det_instances = det_out['instances']
+        boxes, suppressed = select_animal_boxes(det_instances, score_threshold=0.7)
+        num_suppressed_detections += suppressed
+        if suppressed > 0:
+            print(f"[INFO] Suppressed {suppressed} duplicate animal detection(s) in {img_path}")
+        if len(boxes) == 0:
+            print(f"[INFO] No animal detected in {img_path}")
+            continue
+        # Run PRIMA on detected animals
+        dataset = ViTDetDataset(model_cfg, img_bgr, boxes)
+        dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=0)
+        for batch in tqdm(dataloader):
+            batch = recursive_to(batch, device)
+            with torch.no_grad():
+                out = model(batch)
+            pred_cam = out['pred_cam']
+            box_center = batch["box_center"].float()
+            box_size = batch["box_size"].float()
+            img_size = batch["img_size"].float()
+            scaled_focal_length = model_cfg.EXTRA.FOCAL_LENGTH / model_cfg.MODEL.IMAGE_SIZE * img_size.max()
+            pred_cam_t_full = cam_crop_to_full(pred_cam, box_center, box_size, img_size,
+                                               scaled_focal_length).detach().cpu().numpy()
+            # Render the result
+            batch_size = batch['img'].shape[0]
+            for n in range(batch_size):
+                # Get filename from path img_path
+                img_fn, _ = os.path.splitext(os.path.basename(img_path))
+                animal_id = int(batch['animalid'][n])
+                white_img = (torch.ones_like(batch['img'][n]).cpu() - DEFAULT_MEAN[:, None, None] / 255) / (
+                            DEFAULT_STD[:, None, None] / 255)
+                input_patch = (batch['img'][n].cpu() * (DEFAULT_STD[:, None, None]) + (
+                            DEFAULT_MEAN[:, None, None])) / 255.
+                input_patch = input_patch.permute(1, 2, 0).numpy()
+                regression_img = renderer(out['pred_vertices'][n].detach().cpu().numpy(),
+                                        out['pred_cam_t'][n].detach().cpu().numpy(),
+                                        batch['img'][n],
+                                        mesh_base_color=GREEN,
+                                        scene_bg_color=(1, 1, 1),
+                                            )
+                final_img = np.concatenate([input_patch, regression_img], axis=1)
+                if args.side_view:
+                    side_img = renderer(out['pred_vertices'][n].detach().cpu().numpy(),
+                                        out['pred_cam_t'][n].detach().cpu().numpy(),
+                                        white_img,
+                                        mesh_base_color=GREEN,
+                                        scene_bg_color=(1, 1, 1),
+                                        side_view=True)
+                    final_img = np.concatenate([final_img, side_img], axis=1)
+                cv2.imwrite(os.path.join(args.out_folder, f'{img_fn}_{animal_id}.png'),
+                            cv2.cvtColor((255 * final_img).astype(np.uint8), cv2.COLOR_RGB2BGR))
+                num_rendered_results += 1
+                # Add all verts and cams to list
+                verts = out['pred_vertices'][n].detach().cpu().numpy()
+                cam_t = pred_cam_t_full[n]
+                # Save all meshes to disk
+                if args.save_mesh:
+                    camera_translation = cam_t.copy()
+                    tmesh = renderer.vertices_to_trimesh(verts, camera_translation, LIGHT_BLUE)
+                    tmesh.export(os.path.join(args.out_folder, f'{img_fn}_{animal_id}.obj'))
+    print(
+        f"[done] Demo complete. Processed {num_readable_images}/{len(img_paths)} image(s), "
+        f"saved {num_rendered_results} rendered result(s) to {args.out_folder}."
+    )
+    if num_suppressed_detections > 0:
+        print(f"[done] Suppressed {num_suppressed_detections} duplicate animal detection(s).")
+if __name__ == '__main__':
+    main()

demo.sh ADDED Viewed

	@@ -0,0 +1,12 @@

+# Default PRIMA Stage 1 inference checkpoint:
+#   data/PRIMAS1/checkpoints/s1ckpt_inference.ckpt
+#
+# If this local file is missing, it will be downloaded from the PRIMA Hugging Face repo.
+# To use another local checkpoint instead, update this path.
+# For example: checkpoint='data/PRIMAS3/checkpoints/s3ckpt.ckpt'
+checkpoint='data/PRIMAS1/checkpoints/s1ckpt_inference.ckpt'
+python demo.py \
+  --checkpoint "${checkpoint}" \
+  --img_folder demo_data/ \
+  --out_folder demo_out/

demo_data/000000015956_horse.png ADDED Viewed

Git LFS Details

SHA256: 2a2398ba7df40a47c636afefa28be17b55f4b7bc2c378e053aeea507580ad2cb
Pointer size: 131 Bytes
Size of remote file: 620 kB

demo_data/000000315905_zebra.jpg ADDED Viewed

Git LFS Details

SHA256: e0a17e1f1650820b020a9025144015c1e27f0f1ab435859f0bde3a0047d8f689
Pointer size: 131 Bytes
Size of remote file: 257 kB

demo_data/beagle.jpg ADDED Viewed

Git LFS Details

SHA256: ac29e6ea6086831dd9806a8cd3fd608e264ac1af567f6fcfc8797c5bd3d5d560
Pointer size: 131 Bytes
Size of remote file: 350 kB

demo_data/n02101388_1188.png ADDED Viewed

Git LFS Details

SHA256: e45ff508fb8c6437cce22fcb59b4f1b6fe37ddfab1d4cf68d97629f9caa939f4
Pointer size: 131 Bytes
Size of remote file: 319 kB

demo_data/n02412080_12159.png ADDED Viewed

Git LFS Details

SHA256: 03273c57e8b25b258d3eb96af7b4f77b43b5c40be90da83c21875f3322b487f1
Pointer size: 131 Bytes
Size of remote file: 347 kB

demo_data/shepherd_hati.jpg ADDED Viewed

Git LFS Details

SHA256: 65c5878203bc3165dda9011ebfce77cc7d930daed0a215396d8036509d1963c1
Pointer size: 131 Bytes
Size of remote file: 210 kB

demo_tta.py ADDED Viewed

	@@ -0,0 +1,399 @@

+"""
+PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation
+Official implementation of the paper:
+"PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation"
+by Xiaohang Yu, Ti Wang, and Mackenzie Weygandt Mathis
+Licensed under a modified MIT license
+"""
+"""
+demo_tta.py: PRIMA inference with fine-tuned DeepLabCut SuperAnimal TTA
+Pipeline:
+1. Run Detectron2 to detect animals in the input image.
+2. Run PRIMA on each detected animal to obtain 3D pose/shape estimation.
+3. Run a fine-tuned DeepLabCut SuperAnimal pose model (Animal3D 26-joint
+   layout) to obtain 2D keypoints already in PRIMA topology. The fine-tuned
+   snapshot is wired into DLC's
+   ``superanimal_analyze_images`` via the ``customized_pose_checkpoint``
+   and ``customized_model_config`` kwargs.
+4. Run test-time adaptation (TTA) with user-specified lr and num_iters
+   to further optimize the 3D pose and shape estimation.
+5. Render and save before/after TTA results (PNG + OBJ) and the
+   26-keypoint visualization (PNG).
+"""
+from pathlib import Path
+import argparse
+import copy
+import os
+import tempfile
+import warnings
+import cv2
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torch.utils.data
+from tqdm import tqdm
+from prima.models import load_prima
+from prima.utils import recursive_to
+from prima.datasets.vitdet_dataset import ViTDetDataset, DEFAULT_MEAN, DEFAULT_STD
+from prima.utils.detection import ANIMAL_COCO_IDS, select_animal_boxes
+from prima.utils.weights import DEFAULT_HF_REPO_ID, resolve_prima_checkpoint_path
+warnings.filterwarnings("ignore")
+LIGHT_BLUE = (0.65098039, 0.74117647, 0.85882353)
+GREEN = (0.65, 0.86, 0.74)
+REPO_ROOT = Path(__file__).resolve().parent
+def load_renderer_components():
+    try:
+        from prima.utils.renderer import Renderer, cam_crop_to_full
+    except Exception as exc:
+        raise RuntimeError(
+            "Cannot initialize the PRIMA renderer. Rendering requires a working "
+            "pyrender/OpenGL backend such as EGL or OSMesa. Install the missing "
+            "OpenGL runtime for this environment, or run in an environment where "
+            "PYOPENGL_PLATFORM=egl/osmesa works."
+        ) from exc
+    return Renderer, cam_crop_to_full
+def denorm_patch_to_rgb(img_tensor: torch.Tensor) -> np.ndarray:
+    patch = (img_tensor.detach().cpu() * (DEFAULT_STD[:, None, None]) + DEFAULT_MEAN[:, None, None]) / 255.0
+    patch = patch.permute(1, 2, 0).numpy()
+    return np.clip(patch, 0.0, 1.0)
+def save_keypoint_vis(patch_rgb: np.ndarray, kpts_xyc: np.ndarray, save_path: str) -> None:
+    vis = cv2.cvtColor((patch_rgb * 255).astype(np.uint8), cv2.COLOR_RGB2BGR).copy()
+    num_kpts = len(kpts_xyc)
+    for i, (x, y, c) in enumerate(kpts_xyc):
+        if c <= 0:
+            continue
+        # Use distinct color for each keypoint (OpenCV uses BGR)
+        hue = int(179 * i / max(1, num_kpts - 1))
+        color_bgr = cv2.cvtColor(np.uint8([[[hue, 255, 255]]]), cv2.COLOR_HSV2BGR)[0, 0]
+        color_bgr = (int(color_bgr[0]), int(color_bgr[1]), int(color_bgr[2]))
+        cx, cy = int(round(float(x))), int(round(float(y)))
+        cv2.circle(vis, (cx, cy), 3, color_bgr, -1)
+        cv2.putText(vis, str(i), (cx + 3, cy - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255, 255, 255), 1, cv2.LINE_AA)
+    cv2.imwrite(save_path, vis)
+def resolve_sa_weights_path(local_path: str) -> str:
+    """Return a local path to the fine-tuned SuperAnimal .pt snapshot.
+    If ``local_path`` is empty, downloads ``sa_finetune_hrnet_w32.pt`` from the
+    ``MLAdaptiveIntelligence/FMPose3D`` Hugging Face repo (cached under
+    ``~/.cache/huggingface``).
+    """
+    if local_path:
+        return local_path
+    try:
+        from huggingface_hub import hf_hub_download
+    except ImportError:
+        raise ImportError(
+            "huggingface_hub is required to auto-download the fine-tuned "
+            "SuperAnimal weights. Install with `pip install huggingface_hub`, "
+            "or pass --saved_2d_model_path with a local .pt file."
+        ) from None
+    repo_id = "MLAdaptiveIntelligence/FMPose3D"
+    filename = "sa_finetune_hrnet_w32.pt"
+    try:
+        cached_path = hf_hub_download(repo_id=repo_id, filename=filename, local_files_only=True)
+    except Exception:
+        print(f"No --saved_2d_model_path provided; downloading '{filename}' from {repo_id}...")
+        return hf_hub_download(repo_id=repo_id, filename=filename)
+    print(f"Using cached SuperAnimal weights: {cached_path}")
+    return cached_path
+def run_superanimal_on_patch(patch_rgb: np.ndarray, args, tmp_dir: str):
+    """Predict 26-joint 2D keypoints on a single PRIMA patch using a
+    fine-tuned DeepLabCut SuperAnimal snapshot.
+    Returns an ``(26, 3)`` array of ``(x, y, confidence)`` in patch
+    pixel coordinates, or ``None`` if no individual was detected.
+    """
+    try:
+        from deeplabcut.pose_estimation_pytorch.apis import superanimal_analyze_images
+    except Exception as e:
+        raise RuntimeError(
+            "Cannot import DeepLabCut SuperAnimal API. Please install deeplabcut with pose_estimation_pytorch support."
+        ) from e
+    patch_path = os.path.join(tmp_dir, "patch.png")
+    cv2.imwrite(patch_path, cv2.cvtColor((patch_rgb * 255).astype(np.uint8), cv2.COLOR_RGB2BGR))
+    dlc_device = "cuda" if torch.cuda.is_available() else "cpu"
+    preds = superanimal_analyze_images(
+        superanimal_name=args.superanimal_name,
+        model_name=args.superanimal_model_name,
+        detector_name=args.superanimal_detector_name,
+        images=patch_path,
+        max_individuals=args.superanimal_max_individuals,
+        out_folder=tmp_dir,
+        device=dlc_device,
+        customized_model_config=args.pytorch_config_2d_path,
+        customized_pose_checkpoint=args.saved_2d_model_path,
+    )
+    payload = preds.get(patch_path, None)
+    if payload is None:
+        return None
+    bodyparts = payload.get("bodyparts", None)
+    if bodyparts is None or len(bodyparts) == 0:
+        return None
+    best_idx = int(np.argmax(bodyparts[..., 2].mean(axis=1)))
+    return bodyparts[best_idx].astype(np.float32)
+def render_and_save(renderer, cam_crop_to_full_fn, out, batch, img_fn, animal_id, out_folder, suffix, side_view, save_mesh):
+    pred_cam = out['pred_cam']
+    box_center = batch['box_center'].float()
+    box_size = batch['box_size'].float()
+    img_size = batch['img_size'].float()
+    scaled_focal_length = batch['focal_length'][0, 0] / batch['img'].shape[-1] * img_size.max()
+    pred_cam_t_full = cam_crop_to_full_fn(pred_cam, box_center, box_size, img_size, scaled_focal_length)
+    white_img = (torch.ones_like(batch['img'][0]).cpu() - DEFAULT_MEAN[:, None, None] / 255) / (
+        DEFAULT_STD[:, None, None] / 255
+    )
+    input_patch = denorm_patch_to_rgb(batch['img'][0])
+    regression_img = renderer(
+        out['pred_vertices'][0].detach().cpu().numpy(),
+        out['pred_cam_t'][0].detach().cpu().numpy(),
+        batch['img'][0],
+        mesh_base_color=GREEN,
+        scene_bg_color=(1, 1, 1),
+    )
+    final_img = np.concatenate([input_patch, regression_img], axis=1)
+    if side_view:
+        side_img = renderer(
+            out['pred_vertices'][0].detach().cpu().numpy(),
+            out['pred_cam_t'][0].detach().cpu().numpy(),
+            white_img,
+            mesh_base_color=GREEN,
+            scene_bg_color=(1, 1, 1),
+            side_view=True,
+        )
+        final_img = np.concatenate([final_img, side_img], axis=1)
+    cv2.imwrite(
+        os.path.join(out_folder, f'{img_fn}_{animal_id}_{suffix}.png'),
+        cv2.cvtColor((255 * final_img).astype(np.uint8), cv2.COLOR_RGB2BGR),
+    )
+    if save_mesh:
+        verts = out['pred_vertices'][0].detach().cpu().numpy()
+        cam_t = pred_cam_t_full[0].detach().cpu().numpy()
+        tmesh = renderer.vertices_to_trimesh(verts, cam_t.copy(), LIGHT_BLUE)
+        tmesh.export(os.path.join(out_folder, f'{img_fn}_{animal_id}_{suffix}.obj'))
+def tta_optimize(model, batch, gt_kpts_norm, num_iters, lr):
+    model.eval()
+    if hasattr(model, 'backbone'):
+        for p in model.backbone.parameters():
+            p.requires_grad = False
+    orig_smal_head_state = copy.deepcopy(model.smal_head.state_dict())
+    model.smal_head.freeze_except_regression_heads()
+    tta_params = model.smal_head.get_tta_parameters(mode='all')
+    optimizer = torch.optim.Adam(tta_params, lr=lr)
+    valid_mask = (gt_kpts_norm[..., 2] > 0).float().unsqueeze(-1)
+    gt_xy = gt_kpts_norm[..., :2]
+    for _ in range(num_iters):
+        optimizer.zero_grad()
+        out = model(batch)
+        pred_xy = out['pred_keypoints_2d']
+        loss = F.mse_loss(pred_xy * valid_mask, gt_xy * valid_mask, reduction='sum') / (valid_mask.sum() + 1e-6)
+        loss.backward()
+        optimizer.step()
+    with torch.no_grad():
+        out_after = model(batch)
+    model.smal_head.load_state_dict(orig_smal_head_state)
+    model.smal_head.unfreeze_all()
+    return out_after
+def main():
+    parser = argparse.ArgumentParser(description='PRIMA + SuperAnimal + TTA demo')
+    parser.add_argument('--checkpoint', type=str, default='',
+                        help='Path to pretrained PRIMA checkpoint. Empty -> auto-download the default Stage 1 checkpoint.')
+    parser.add_argument('--hf-repo-id', '--hf_repo_id', dest='hf_repo_id',
+                        type=str, default=os.environ.get("PRIMA_HF_REPO_ID", DEFAULT_HF_REPO_ID),
+                        help='Hugging Face repo ID containing PRIMA demo assets')
+    parser.add_argument('--no-auto-download', '--no_auto_download', dest='no_auto_download', action='store_true',
+                        help='Disable automatic download of missing PRIMA demo assets')
+    parser.add_argument('--img_path', type=str, default=None, help='Single image path')
+    parser.add_argument('--img_folder', type=str, default='demo_data/', help='Folder with input images')
+    parser.add_argument('--out_folder', type=str, default='demo_out_tta', help='Output folder')
+    parser.add_argument('--side_view', dest='side_view', action='store_true', default=False, help='Render side view')
+    parser.add_argument('--save_mesh', dest='save_mesh', action='store_true', default=False, help='Save meshes')
+    parser.add_argument('--file_type', nargs='+', default=['*.jpg', '*.png', '*.jpeg', '*.JPEG'], help='Image globs')
+    parser.add_argument('--det_thresh', type=float, default=0.7, help='Detectron2 score threshold for animals')
+    parser.add_argument('--tta_lr', type=float, default=1e-6, help='TTA learning rate')
+    parser.add_argument('--tta_num_iters', type=int, default=30, help='TTA iterations')
+    parser.add_argument('--kp_conf_thresh', type=float, default=0.1, help='Keypoint confidence threshold')
+    parser.add_argument('--superanimal_name', type=str, default='superanimal_quadruped')
+    parser.add_argument('--superanimal_model_name', type=str, default='hrnet_w32')
+    parser.add_argument('--superanimal_detector_name', type=str, default='fasterrcnn_resnet50_fpn_v2')
+    parser.add_argument('--superanimal_max_individuals', type=int, default=1)
+    parser.add_argument('--saved_2d_model_path', type=str, default='',
+                        help='Path to the fine-tuned SuperAnimal 26-joint .pt snapshot. '
+                             'Empty -> auto-download sa_finetune_hrnet_w32.pt from '
+                             'MLAdaptiveIntelligence/FMPose3D on Hugging Face Hub.')
+    parser.add_argument('--pytorch_config_2d_path', type=str,
+                        default=str(Path(__file__).resolve().parent / 'configs' / 'sa_finetune_hrnet_w32.yaml'),
+                        help='Path to the DLC pytorch config yaml for the fine-tuned snapshot. '
+                             'Defaults to the bundled configs/sa_finetune_hrnet_w32.yaml.')
+    args = parser.parse_args()
+    checkpoint_path = resolve_prima_checkpoint_path(
+        args.checkpoint,
+        data_dir=REPO_ROOT / "data",
+        auto_download=not args.no_auto_download,
+        hf_repo_id=args.hf_repo_id,
+    )
+    args.saved_2d_model_path = resolve_sa_weights_path(args.saved_2d_model_path)
+    model, model_cfg = load_prima(checkpoint_path)
+    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+    model = model.to(device)
+    model.eval()
+    Renderer, cam_crop_to_full_fn = load_renderer_components()
+    renderer = Renderer(model_cfg, faces=model.smal.faces)
+    os.makedirs(args.out_folder, exist_ok=True)
+    import detectron2.config
+    import detectron2.engine
+    from detectron2 import model_zoo
+    cfg = detectron2.config.get_cfg()
+    cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"))
+    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
+    cfg.MODEL.WEIGHTS = "https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/model_final_68b088.pkl"
+    cfg.MODEL.DEVICE = device.type
+    detector = detectron2.engine.DefaultPredictor(cfg)
+    if args.img_path is not None:
+        img_paths = [Path(args.img_path)]
+    else:
+        img_paths = sorted([img for end in args.file_type for img in Path(args.img_folder).glob(end)])
+    for img_path in img_paths:
+        img_bgr = cv2.imread(str(img_path))
+        if img_bgr is None:
+            print(f"[WARN] Cannot read image: {img_path}")
+            continue
+        det_out = detector(img_bgr)
+        det_instances = det_out['instances']
+        boxes, suppressed = select_animal_boxes(
+            det_instances,
+            animal_class_ids=ANIMAL_COCO_IDS,
+            score_threshold=args.det_thresh,
+        )
+        if suppressed > 0:
+            print(f"[INFO] Suppressed {suppressed} duplicate animal detection(s) in {img_path}")
+        if len(boxes) == 0:
+            print(f"[INFO] No animal detected in {img_path}")
+            continue
+        dataset = ViTDetDataset(model_cfg, img_bgr, boxes)
+        dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0)
+        for batch in tqdm(dataloader, desc=f"{img_path.name}"):
+            batch = recursive_to(batch, device)
+            with torch.no_grad():
+                out_before = model(batch)
+            img_fn = img_path.stem
+            animal_id = int(batch['animalid'][0])
+            render_and_save(
+                renderer,
+                cam_crop_to_full_fn,
+                out_before,
+                batch,
+                img_fn,
+                animal_id,
+                args.out_folder,
+                suffix='before_tta',
+                side_view=args.side_view,
+                save_mesh=args.save_mesh,
+            )
+            patch_rgb = denorm_patch_to_rgb(batch['img'][0])
+            with tempfile.TemporaryDirectory(prefix=f"dlc_{img_fn}_{animal_id}_") as tmp_dir:
+                kpts_xyc = run_superanimal_on_patch(patch_rgb, args, tmp_dir)
+            if kpts_xyc is None:
+                print(f"[WARN] No SuperAnimal keypoints for {img_fn}_{animal_id}, skip TTA")
+                continue
+            kpts_xyc[kpts_xyc[:, 2] < args.kp_conf_thresh, 2] = 0.0
+            save_keypoint_vis(
+                patch_rgb,
+                kpts_xyc,
+                os.path.join(args.out_folder, f"{img_fn}_{animal_id}_prima26_kpts.png"),
+            )
+            np.save(os.path.join(args.out_folder, f"{img_fn}_{animal_id}_prima26_kpts.npy"), kpts_xyc)
+            patch_h, patch_w = patch_rgb.shape[:2]
+            kpts_norm = kpts_xyc.copy()
+            kpts_norm[:, 0] = kpts_norm[:, 0] / float(patch_w) - 0.5
+            kpts_norm[:, 1] = kpts_norm[:, 1] / float(patch_h) - 0.5
+            gt_kpts_norm = torch.from_numpy(kpts_norm[None]).to(device=device, dtype=batch['img'].dtype)
+            out_after = tta_optimize(
+                model,
+                batch,
+                gt_kpts_norm,
+                num_iters=args.tta_num_iters,
+                lr=args.tta_lr,
+            )
+            render_and_save(
+                renderer,
+                cam_crop_to_full_fn,
+                out_after,
+                batch,
+                img_fn,
+                animal_id,
+                args.out_folder,
+                suffix='after_tta',
+                side_view=args.side_view,
+                save_mesh=args.save_mesh,
+            )
+if __name__ == '__main__':
+    main()

demo_tta.sh ADDED Viewed

	@@ -0,0 +1,15 @@

+# Empty checkpoint uses the default PRIMA Stage 1 inference checkpoint:
+#   data/PRIMAS1/checkpoints/s1ckpt_inference.ckpt
+#
+# This standard path is auto-downloaded from the PRIMA Hugging Face repo if missing.
+# To use another local checkpoint instead, update this path.
+# For example: checkpoint='data/PRIMAS3/checkpoints/s3ckpt.ckpt'
+checkpoint='data/PRIMAS1/checkpoints/s1ckpt_inference.ckpt'
+python3 demo_tta.py \
+  --checkpoint "${checkpoint}" \
+  --img_folder demo_data/ \
+  --out_folder demo_out_tta/ \
+  --tta_lr 1e-6 \
+  --tta_num_iters 30

eval.py ADDED Viewed

	@@ -0,0 +1,103 @@

+"""
+PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation
+Official implementation of the paper:
+"PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation"
+by Xiaohang Yu, Ti Wang, and Mackenzie Weygandt Mathis
+Licensed under a modified MIT license
+"""
+import numpy as np
+from tqdm import tqdm
+import torch
+from prima.utils import recursive_to
+from prima.utils.evaluate_metric import Evaluator
+from prima.datasets.datasets import EvaluationDataset
+import argparse
+from torch.utils.data import DataLoader
+from prima.models.prima import PRIMA
+from prima.configs import get_config
+torch.multiprocessing.set_sharing_strategy('file_system')
+def main(args):
+    cfg = get_config(args.config)
+    default_cfg = get_config(args.default_eval_config)
+    model = PRIMA.load_from_checkpoint(args.checkpoint, cfg=cfg, strict=False)
+    model.eval()
+    model = model.to(args.device)
+    smal_evaluator = Evaluator(smal_model=model.smal, image_size=cfg.MODEL.IMAGE_SIZE)
+    cfg_eval_dataset = dict(default_cfg.DATASETS)
+    aug_cfg = cfg_eval_dataset.pop("CONFIG", None)  # augmentation config is not used in evaluation
+    if args.dataset.upper() == "ALL":
+        for key in cfg_eval_dataset.keys():
+            print(f"-------- Evaluate {key} dataset ------------")
+            eval_one_dataset(cfg_eval_dataset[key], default_cfg, cfg, model,
+                             evaluator=smal_evaluator,
+                             aug_cfg=aug_cfg,
+                             key=key,
+                             device=args.device)
+            print(f"-------{key} Dataset evaluate finish ------")
+    else:
+        print(f"-------- Evaluate {args.dataset} dataset ------------")
+        eval_one_dataset(cfg_eval_dataset[args.dataset], default_cfg, cfg, model,
+                         evaluator=smal_evaluator,
+                         aug_cfg=aug_cfg,
+                         key=args.dataset,
+                         device=args.device)
+        print(f"-------{args.dataset} Dataset evaluate finish ------")
+def eval_one_dataset(dataset_cfg, default_cfg, cfg, model, evaluator, aug_cfg, key, device='cuda'):
+    dataset = EvaluationDataset(root_image=dataset_cfg['ROOT_IMAGE'],
+                                json_file=dataset_cfg['JSON_FILE']['TEST'],
+                                augm_config=aug_cfg, focal_length=cfg.SMAL.get("FOCAL_LENGTH", 1000),
+                                image_size=cfg.MODEL.IMAGE_SIZE,
+                                )
+    dataloader = DataLoader(dataset, batch_size=1, num_workers=cfg.GENERAL.NUM_WORKERS)
+    bar = tqdm(dataloader)
+    pa_mpjpe_list, pck_list, auc_list, pa_mpvpe_list = [], [], [], []
+    for i, batch in enumerate(bar):
+        batch = recursive_to(batch, device)
+        with torch.no_grad():
+            output = model(batch)
+        if key in ["ANIMAL3D", "CONTROL_ANIMAL3D"]:
+            pa_mpjpe, pa_mpvpe = evaluator.eval_3d(output, batch)
+        else:
+            pa_mpjpe, pa_mpvpe = 0., 0.
+        pck, auc = evaluator.eval_2d(output, batch, pck_threshold=default_cfg.METRIC.PCK_THRESHOLD)
+        pa_mpjpe_list.append(pa_mpjpe)
+        pa_mpvpe_list.append(pa_mpvpe)
+        auc_list.append(auc)
+        pck_list.append(pck)
+        bar.set_postfix(PA_MPJPE=pa_mpjpe,
+                        PA_MPVPE=pa_mpvpe,
+                        AUC=auc,
+                        pck=pck,)
+    print("---------------- 3D metric -----------------")
+    print(f"Avg PA-MPJPE: {np.mean(pa_mpjpe_list)}")
+    print(f"Avg PA-MPVPE: {np.mean(pa_mpvpe_list)}")
+    print("--------------- 2D metric ------------------")
+    print(f"AUC: {np.mean(auc_list)}")
+    pck_list = np.array(pck_list)
+    for _, th in enumerate(default_cfg.METRIC.PCK_THRESHOLD):
+        print(f"PCK@{th}: {np.mean(pck_list[:, _])}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config", type=str, help="Path to config file", required=True)
+    parser.add_argument("--checkpoint", type=str, help="Path to checkpoint file", required=True)
+    parser.add_argument("--default_eval_config", type=str, default="./configs_hydra/experiment/default_val.yaml")
+    parser.add_argument("--dataset", type=str, default="ALL")
+    parser.add_argument("--device", type=str, default="cuda", help="Device to use for evaluation")
+    args = parser.parse_args()
+    main(args)

images/teaser.png ADDED Viewed

Git LFS Details

SHA256: a617ca4fd37de03e2db4ccf397ce9841ed32c3fe18c766c4832d41af574ad746
Pointer size: 132 Bytes
Size of remote file: 4.29 MB

packages.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+libosmesa6
+libgl1
+libegl1
+libgles2

prima/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+"""
+PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation
+Official implementation of the paper:
+"PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation"
+by Xiaohang Yu, Ti Wang, and Mackenzie Weygandt Mathis
+Licensed under a modified MIT license
+"""
+"""Top-level package for PRIMA.
+This package contains models, datasets and utilities for
+3D animal pose and shape estimation.
+"""
+from importlib.metadata import PackageNotFoundError, version
+try:  # pragma: no cover - best effort during development
+	__version__ = version("prima-animal")
+except PackageNotFoundError:  # pragma: no cover
+	__version__ = "0.0.0"
+__all__ = ["__version__"]

prima/configs/__init__.py ADDED Viewed

	@@ -0,0 +1,99 @@

+"""
+PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation
+Official implementation of the paper:
+"PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation"
+by Xiaohang Yu, Ti Wang, and Mackenzie Weygandt Mathis
+Licensed under a modified MIT license
+"""
+from typing import Dict
+from yacs.config import CfgNode as CN
+def to_lower(x: Dict) -> Dict:
+    """
+    Convert all dictionary keys to lowercase
+    Args:
+      x (dict): Input dictionary
+    Returns:
+      dict: Output dictionary with all keys converted to lowercase
+    """
+    return {k.lower(): v for k, v in x.items()}
+_C = CN(new_allowed=True)
+_C.GENERAL = CN(new_allowed=True)
+_C.GENERAL.RESUME = True
+_C.GENERAL.TIME_TO_RUN = 3300
+_C.GENERAL.VAL_STEPS = 100
+_C.GENERAL.LOG_STEPS = 100
+_C.GENERAL.CHECKPOINT_STEPS = 20000
+_C.GENERAL.CHECKPOINT_DIR = "checkpoints"
+_C.GENERAL.SUMMARY_DIR = "tensorboard"
+_C.GENERAL.NUM_GPUS = 1
+_C.GENERAL.NUM_WORKERS = 4
+_C.GENERAL.MIXED_PRECISION = True
+_C.GENERAL.ALLOW_CUDA = True
+_C.GENERAL.PIN_MEMORY = False
+_C.GENERAL.DISTRIBUTED = False
+_C.GENERAL.LOCAL_RANK = 0
+_C.GENERAL.USE_SYNCBN = False
+_C.GENERAL.WORLD_SIZE = 1
+_C.GENERAL.PREFETCH_FACTOR = 2
+_C.TRAIN = CN(new_allowed=True)
+_C.TRAIN.NUM_EPOCHS = 100
+_C.TRAIN.SHUFFLE = True
+_C.TRAIN.WARMUP = False
+_C.TRAIN.NORMALIZE_PER_IMAGE = False
+_C.TRAIN.CLIP_GRAD = False
+_C.TRAIN.CLIP_GRAD_VALUE = 1.0
+_C.LOSS_WEIGHTS = CN(new_allowed=True)
+_C.DATASETS = CN(new_allowed=True)
+_C.MODEL = CN(new_allowed=True)
+_C.MODEL.IMAGE_SIZE = 224
+_C.EXTRA = CN(new_allowed=True)
+_C.EXTRA.FOCAL_LENGTH = 5000
+_C.DATASETS.CONFIG = CN(new_allowed=True)
+_C.DATASETS.CONFIG.SCALE_FACTOR = 0.3
+_C.DATASETS.CONFIG.ROT_FACTOR = 30
+_C.DATASETS.CONFIG.TRANS_FACTOR = 0.02
+_C.DATASETS.CONFIG.COLOR_SCALE = 0.2
+_C.DATASETS.CONFIG.ROT_AUG_RATE = 0.6
+_C.DATASETS.CONFIG.TRANS_AUG_RATE = 0.5
+_C.DATASETS.CONFIG.DO_FLIP = False
+_C.DATASETS.CONFIG.FLIP_AUG_RATE = 0.5
+_C.DATASETS.CONFIG.EXTREME_CROP_AUG_RATE = 0.10
+def default_config() -> CN:
+    """
+    Get a yacs CfgNode object with the default config values.
+    """
+    # Return a clone so that the defaults will not be altered
+    # This is for the "local variable" use pattern
+    return _C.clone()
+def get_config(config_file: str, merge: bool = True) -> CN:
+    """
+    Read a config file and optionally merge it with the default config file.
+    Args:
+      config_file (str): Path to config file.
+      merge (bool): Whether to merge with the default config or not.
+    Returns:
+      CfgNode: Config as a yacs CfgNode object.
+    """
+    if merge:
+        cfg = default_config()
+    else:
+        cfg = CN(new_allowed=True)
+    cfg.merge_from_file(config_file)
+    cfg.freeze()
+    return cfg

prima/models/__init__.py ADDED Viewed

	@@ -0,0 +1,54 @@

+"""
+PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation
+Official implementation of the paper:
+"PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation"
+by Xiaohang Yu, Ti Wang, and Mackenzie Weygandt Mathis
+Licensed under a modified MIT license
+"""
+from .prima import PRIMA
+def load_prima(checkpoint_path):
+    from pathlib import Path
+    from ..configs import get_config
+    model_cfg = str(Path(checkpoint_path).parent.parent / '.hydra/config.yaml')
+    model_cfg = get_config(model_cfg)
+    # Override some config values, to crop bbox correctly
+    if (model_cfg.MODEL.BACKBONE.TYPE == 'vit') and ('BBOX_SHAPE' not in model_cfg.MODEL):
+        model_cfg.defrost()
+        assert model_cfg.MODEL.IMAGE_SIZE == 256, f"MODEL.IMAGE_SIZE ({model_cfg.MODEL.IMAGE_SIZE}) should be 256 for ViT backbone"
+        model_cfg.MODEL.BBOX_SHAPE = [192, 256]
+        model_cfg.freeze()
+    if (model_cfg.MODEL.BACKBONE.TYPE == 'dinov3') and ('BBOX_SHAPE' not in model_cfg.MODEL):
+        model_cfg.defrost()
+        assert model_cfg.MODEL.IMAGE_SIZE == 256, f"MODEL.IMAGE_SIZE ({model_cfg.MODEL.IMAGE_SIZE}) should be 256 for dino backbone"
+        model_cfg.MODEL.BBOX_SHAPE = [256, 256]
+        model_cfg.freeze()
+    if (model_cfg.MODEL.BACKBONE.TYPE == 'dinov2') and ('BBOX_SHAPE' not in model_cfg.MODEL):
+        model_cfg.defrost()
+        assert model_cfg.MODEL.IMAGE_SIZE == 252, f"MODEL.IMAGE_SIZE ({model_cfg.MODEL.IMAGE_SIZE}) should be 252 for dino backbone"
+        model_cfg.MODEL.BBOX_SHAPE = [252, 252]
+        model_cfg.freeze()
+    # Update config to be compatible with demo
+    if ('PRETRAINED_WEIGHTS' in model_cfg.MODEL.BACKBONE):
+        model_cfg.defrost()
+        model_cfg.MODEL.BACKBONE.pop('PRETRAINED_WEIGHTS')
+        model_cfg.freeze()
+    # Offscreen training renderer is not needed for demo/inference startup and
+    # can fail on some local OpenGL backends.
+    model = PRIMA.load_from_checkpoint(
+        checkpoint_path,
+        strict=False,
+        cfg=model_cfg,
+        map_location='cpu',
+        init_renderer=False,
+    )
+    return model, model_cfg

prima/models/backbones/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+"""
+PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation
+Official implementation of the paper:
+"PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation"
+by Xiaohang Yu, Ti Wang, and Mackenzie Weygandt Mathis
+Licensed under a modified MIT license
+"""
+from .vit import vith
+def create_backbone(cfg):
+    if cfg.MODEL.BACKBONE.TYPE in ['vith','concat','aa']:   # vit bb will be used in these three cases - animal feature extractor
+        return vith(cfg)
+    else:
+        raise NotImplementedError('Backbone type is not implemented')

prima/models/backbones/vit.py ADDED Viewed

	@@ -0,0 +1,375 @@

+"""
+PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation
+Official implementation of the paper:
+"PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation"
+by Xiaohang Yu, Ti Wang, and Mackenzie Weygandt Mathis
+Licensed under a modified MIT license
+"""
+# Copyright (c) OpenMMLab. All rights reserved.
+import math
+import torch
+from functools import partial
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+from timm.layers import drop_path, to_2tuple, trunc_normal_
+def vith(cfg):
+    return ViT(
+        img_size=(256, 192),
+        patch_size=16,
+        embed_dim=1280,
+        depth=32,
+        num_heads=16,
+        ratio=1,
+        use_checkpoint=False,
+        # use_checkpoint=True,
+        mlp_ratio=4,
+        qkv_bias=True,
+        drop_path_rate=0.55,
+        use_cls=True, # cls for animal family classification
+    )
+def get_abs_pos(abs_pos, h, w, ori_h, ori_w, has_cls_token=True):
+    """
+    Calculate absolute positional embeddings. If needed, resize embeddings and remove cls_token
+        dimension for the original embeddings.
+    Args:
+        abs_pos (Tensor): absolute positional embeddings with (1, num_position, C).
+        has_cls_token (bool): If true, has 1 embedding in abs_pos for cls token.
+        hw (Tuple): size of input image tokens.
+    Returns:
+        Absolute positional embeddings after processing with shape (1, H, W, C)
+    """
+    cls_token = None
+    B, L, C = abs_pos.shape
+    if has_cls_token:
+        cls_token = abs_pos[:, 0:1]
+        abs_pos = abs_pos[:, 1:]
+    if ori_h != h or ori_w != w:
+        new_abs_pos = F.interpolate(
+            abs_pos.reshape(1, ori_h, ori_w, -1).permute(0, 3, 1, 2),
+            size=(h, w),
+            mode="bicubic",
+            align_corners=False,
+        ).permute(0, 2, 3, 1).reshape(B, -1, C)
+    else:
+        new_abs_pos = abs_pos
+    if cls_token is not None:
+        new_abs_pos = torch.cat([cls_token, new_abs_pos], dim=1)
+    return new_abs_pos
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
+    """
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)
+    def extra_repr(self):
+        return 'p={}'.format(self.drop_prob)
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+class Attention(nn.Module):
+    def __init__(
+            self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
+            proj_drop=0., attn_head_dim=None):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.dim = dim
+        if attn_head_dim is not None:
+            head_dim = attn_head_dim
+        all_head_dim = head_dim * self.num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+        self.qkv = nn.Linear(dim, all_head_dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(all_head_dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+    def forward(self, x):
+        B, N, C = x.shape
+        qkv = self.qkv(x)
+        qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)
+        q = q * self.scale
+        attn = (q @ k.transpose(-2, -1))
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B, N, -1)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class Block(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None,
+                 drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU,
+                 norm_layer=nn.LayerNorm, attn_head_dim=None,
+                 ):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+            attn_drop=attn_drop, proj_drop=drop, attn_head_dim=attn_head_dim
+        )
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+    def forward(self, x):
+        x = x + self.drop_path(self.attn(self.norm1(x)))
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, ratio=1):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) * (ratio ** 2)
+        self.patch_shape = (int(img_size[0] // patch_size[0] * ratio), int(img_size[1] // patch_size[1] * ratio))
+        self.origin_patch_shape = (int(img_size[0] // patch_size[0]), int(img_size[1] // patch_size[1]))
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.num_patches = num_patches
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=(patch_size[0] // ratio),
+                              padding=4 + 2 * (ratio // 2 - 1))
+    def forward(self, x, **kwargs):
+        B, C, H, W = x.shape
+        x = self.proj(x)
+        Hp, Wp = x.shape[2], x.shape[3]
+        x = x.flatten(2).transpose(1, 2)
+        return x, (Hp, Wp)
+class HybridEmbed(nn.Module):
+    """ CNN Feature Map Embedding
+    Extract feature map from CNN, flatten, project to embedding dim.
+    """
+    def __init__(self, backbone, img_size=224, feature_size=None, in_chans=3, embed_dim=768):
+        super().__init__()
+        assert isinstance(backbone, nn.Module)
+        img_size = to_2tuple(img_size)
+        self.img_size = img_size
+        self.backbone = backbone
+        if feature_size is None:
+            with torch.no_grad():
+                training = backbone.training
+                if training:
+                    backbone.eval()
+                o = self.backbone(torch.zeros(1, in_chans, img_size[0], img_size[1]))[-1]
+                feature_size = o.shape[-2:]
+                feature_dim = o.shape[1]
+                backbone.train(training)
+        else:
+            feature_size = to_2tuple(feature_size)
+            feature_dim = self.backbone.feature_info.channels()[-1]
+        self.num_patches = feature_size[0] * feature_size[1]
+        self.proj = nn.Linear(feature_dim, embed_dim)
+    def forward(self, x):
+        x = self.backbone(x)[-1]
+        x = x.flatten(2).transpose(1, 2)
+        x = self.proj(x)
+        return x
+class ViT(nn.Module):
+    def __init__(self,
+                 img_size=224, patch_size=16, in_chans=3, num_classes=80, embed_dim=768, depth=12,
+                 num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0.,
+                 drop_path_rate=0., hybrid_backbone=None, norm_layer=None, use_checkpoint=False,
+                 frozen_stages=-1, ratio=1, last_norm=True, use_cls=False,
+                 patch_padding='pad', freeze_attn=False, freeze_ffn=False,
+                 ):
+        # Protect mutable default arguments
+        super(ViT, self).__init__()
+        norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
+        self.frozen_stages = frozen_stages
+        self.use_checkpoint = use_checkpoint
+        self.patch_padding = patch_padding
+        self.freeze_attn = freeze_attn
+        self.freeze_ffn = freeze_ffn
+        self.depth = depth
+        if hybrid_backbone is not None:
+            self.patch_embed = HybridEmbed(
+                hybrid_backbone, img_size=img_size, in_chans=in_chans, embed_dim=embed_dim)
+        else:
+            self.patch_embed = PatchEmbed(
+                img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim, ratio=ratio)
+        num_patches = self.patch_embed.num_patches
+        # since the pretraining model has class token
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]  # stochastic depth decay rule
+        self.blocks = nn.ModuleList([
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
+                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer,
+            )
+            for i in range(depth)])
+        self.last_norm = norm_layer(embed_dim) if last_norm else nn.Identity()
+        if self.pos_embed is not None:
+            trunc_normal_(self.pos_embed, std=.02)
+        self.use_cls = use_cls
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        nn.init.normal_(self.cls_token, std=1e-6)
+        self._freeze_stages()
+    def _freeze_stages(self):
+        """Freeze parameters."""
+        if self.frozen_stages >= 0:
+            self.patch_embed.eval()
+            for param in self.patch_embed.parameters():
+                param.requires_grad = False
+        for i in range(1, self.frozen_stages + 1):
+            m = self.blocks[i]
+            m.eval()
+            for param in m.parameters():
+                param.requires_grad = False
+        if self.freeze_attn:
+            for i in range(0, self.depth):
+                m = self.blocks[i]
+                m.attn.eval()
+                m.norm1.eval()
+                for param in m.attn.parameters():
+                    param.requires_grad = False
+                for param in m.norm1.parameters():
+                    param.requires_grad = False
+        if self.freeze_ffn:
+            self.pos_embed.requires_grad = False
+            self.patch_embed.eval()
+            for param in self.patch_embed.parameters():
+                param.requires_grad = False
+            for i in range(0, self.depth):
+                m = self.blocks[i]
+                m.mlp.eval()
+                m.norm2.eval()
+                for param in m.mlp.parameters():
+                    param.requires_grad = False
+                for param in m.norm2.parameters():
+                    param.requires_grad = False
+    def init_weights(self):
+        """Initialize the weights in backbone.
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+        def _init_weights(m):
+            if isinstance(m, nn.Linear):
+                trunc_normal_(m.weight, std=.02)
+                if isinstance(m, nn.Linear) and m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.LayerNorm):
+                nn.init.constant_(m.bias, 0)
+                nn.init.constant_(m.weight, 1.0)
+        self.apply(_init_weights)
+    def get_num_layers(self):
+        return len(self.blocks)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'pos_embed', 'cls_token'}
+    def forward_features(self, x):
+        B, C, H, W = x.shape
+        x, (Hp, Wp) = self.patch_embed(x)
+        if self.pos_embed is not None:
+            # fit for multiple GPU training
+            # since the first element for pos embed (sin-cos manner) is zero, it will cause no difference
+            x = x + self.pos_embed[:, 1:] + self.pos_embed[:, :1]
+        x = torch.cat((self.cls_token.expand(B, -1, -1), x), dim=1) if self.use_cls else x
+        for blk in self.blocks:
+            if self.use_checkpoint:
+                x = checkpoint.checkpoint(blk, x)
+            else:
+                x = blk(x)
+        x = self.last_norm(x)
+        cls = x[:, 0] if self.use_cls else None
+        x = x[:, 1:] if self.use_cls else x
+        xp = x.permute(0, 2, 1).reshape(B, -1, Hp, Wp).contiguous()
+        return xp, cls # shape [B, D, Hp, Wp], [B, D]
+    def forward(self, x):
+        x, cls = self.forward_features(x)
+        return x, cls
+    def train(self, mode=True):
+        """Convert the model into training mode."""
+        super().train(mode)
+        self._freeze_stages()

prima/models/bioclip_embedding.py ADDED Viewed

	@@ -0,0 +1,70 @@

+"""
+PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation
+Official implementation of the paper:
+"PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation"
+by Xiaohang Yu, Ti Wang, and Mackenzie Weygandt Mathis
+Licensed under a modified MIT license
+"""
+"""
+bioclip Embedding Module
+Converts image batch to embeddings that can be concatenated with image features
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class BioClipEmbedding(nn.Module):
+    """
+    Embeds images into a feature space using BioClip model that can be combined with image features.
+    Args:
+        embed_dim: Output embedding dimension, should match the dimension of image features for concatenation
+    """
+    def __init__(self, cfg, embed_dim: int = 1024):
+        super().__init__()
+        self.embed_dim = embed_dim
+        import open_clip
+        if cfg.MODEL.BIOCLIP_EMBEDDING.TYPE == 'bioclip2':
+            print("[BioClipEmbedding] Using BioClip2 model from Hugging Face Hub")
+            self.species_model, _,_ = open_clip.create_model_and_transforms('hf-hub:imageomics/bioclip-2')
+        else:
+            self.species_model, _,_ = open_clip.create_model_and_transforms('hf-hub:imageomics/bioclip')
+        # tokenizer = open_clip.get_tokenizer('hf-hub:imageomics/bioclip')
+        self.species_model.eval()
+        # Get the output dimension from the model
+        bioclip_output_dim = self.species_model.visual.output_dim
+        # Project to target dimension
+        self.projection = nn.Sequential(
+            nn.Linear(bioclip_output_dim, embed_dim),
+            nn.LayerNorm(embed_dim),
+        )
+    def forward(self, images: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            images: Tensor of shape (B, C, H, W) representing a batch of images
+        Returns:
+            Tensor of shape (B, embed_dim) representing the embedded features
+        """
+        # BioClip expects 224x224 input, resize if needed
+        if images.shape[-2:] != (224, 224):
+            images_resized = F.interpolate(images, size=(224, 224), mode='bilinear', align_corners=False)
+        else:
+            images_resized = images
+        with torch.no_grad():
+            image_features = self.species_model.encode_image(images_resized)
+        projected_features = self.projection(image_features)
+        return projected_features

prima/models/components/__init__.py ADDED Viewed

File without changes

prima/models/components/model_utils.py ADDED Viewed

	@@ -0,0 +1,160 @@

+"""
+PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation
+Official implementation of the paper:
+"PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation"
+by Xiaohang Yu, Ti Wang, and Mackenzie Weygandt Mathis
+Licensed under a modified MIT license
+"""
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import copy
+from typing import Tuple
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def select_closest_cond_frames(frame_idx, cond_frame_outputs, max_cond_frame_num):
+    """
+    Select up to `max_cond_frame_num` conditioning frames from `cond_frame_outputs`
+    that are temporally closest to the current frame at `frame_idx`. Here, we take
+    - a) the closest conditioning frame before `frame_idx` (if any);
+    - b) the closest conditioning frame after `frame_idx` (if any);
+    - c) any other temporally closest conditioning frames until reaching a total
+         of `max_cond_frame_num` conditioning frames.
+    Outputs:
+    - selected_outputs: selected items (keys & values) from `cond_frame_outputs`.
+    - unselected_outputs: items (keys & values) not selected in `cond_frame_outputs`.
+    """
+    if max_cond_frame_num == -1 or len(cond_frame_outputs) <= max_cond_frame_num:
+        selected_outputs = cond_frame_outputs
+        unselected_outputs = {}
+    else:
+        assert max_cond_frame_num >= 2, "we should allow using 2+ conditioning frames"
+        selected_outputs = {}
+        # the closest conditioning frame before `frame_idx` (if any)
+        idx_before = max((t for t in cond_frame_outputs if t < frame_idx), default=None)
+        if idx_before is not None:
+            selected_outputs[idx_before] = cond_frame_outputs[idx_before]
+        # the closest conditioning frame after `frame_idx` (if any)
+        idx_after = min((t for t in cond_frame_outputs if t >= frame_idx), default=None)
+        if idx_after is not None:
+            selected_outputs[idx_after] = cond_frame_outputs[idx_after]
+        # add other temporally closest conditioning frames until reaching a total
+        # of `max_cond_frame_num` conditioning frames.
+        num_remain = max_cond_frame_num - len(selected_outputs)
+        inds_remain = sorted(
+            (t for t in cond_frame_outputs if t not in selected_outputs),
+            key=lambda x: abs(x - frame_idx),
+        )[:num_remain]
+        selected_outputs.update((t, cond_frame_outputs[t]) for t in inds_remain)
+        unselected_outputs = {
+            t: v for t, v in cond_frame_outputs.items() if t not in selected_outputs
+        }
+    return selected_outputs, unselected_outputs
+def get_1d_sine_pe(pos_inds, dim, temperature=10000):
+    """
+    Get 1D sine positional embedding as in the original Transformer paper.
+    """
+    pe_dim = dim // 2
+    dim_t = torch.arange(pe_dim, dtype=torch.float32, device=pos_inds.device)
+    dim_t = temperature ** (2 * (dim_t // 2) / pe_dim)
+    pos_embed = pos_inds.unsqueeze(-1) / dim_t
+    pos_embed = torch.cat([pos_embed.sin(), pos_embed.cos()], dim=-1)
+    return pos_embed
+def get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(f"activation should be relu/gelu, not {activation}.")
+def get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
+class DropPath(nn.Module):
+    # adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/layers/drop.py
+    def __init__(self, drop_prob=0.0, scale_by_keep=True):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+        self.scale_by_keep = scale_by_keep
+    def forward(self, x):
+        if self.drop_prob == 0.0 or not self.training:
+            return x
+        keep_prob = 1 - self.drop_prob
+        shape = (x.shape[0],) + (1,) * (x.ndim - 1)
+        random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
+        if keep_prob > 0.0 and self.scale_by_keep:
+            random_tensor.div_(keep_prob)
+        return x * random_tensor
+# Lightly adapted from
+# https://github.com/facebookresearch/MaskFormer/blob/main/mask_former/modeling/transformer/transformer_predictor.py # noqa
+class MLP(nn.Module):
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        output_dim: int,
+        num_layers: int,
+        activation: nn.Module = nn.ReLU,
+        sigmoid_output: bool = False,
+    ) -> None:
+        super().__init__()
+        self.num_layers = num_layers
+        h = [hidden_dim] * (num_layers - 1)
+        self.layers = nn.ModuleList(
+            nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])
+        )
+        self.sigmoid_output = sigmoid_output
+        self.act = activation()
+    def forward(self, x):
+        for i, layer in enumerate(self.layers):
+            x = self.act(layer(x)) if i < self.num_layers - 1 else layer(x)
+        if self.sigmoid_output:
+            x = F.sigmoid(x)
+        return x
+# From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa
+# Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119  # noqa
+class LayerNorm2d(nn.Module):
+    def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(num_channels))
+        self.bias = nn.Parameter(torch.zeros(num_channels))
+        self.eps = eps
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        u = x.mean(1, keepdim=True)
+        s = (x - u).pow(2).mean(1, keepdim=True)
+        x = (x - u) / torch.sqrt(s + self.eps)
+        x = self.weight[:, None, None] * x + self.bias[:, None, None]
+        return x

prima/models/components/pose_transformer.py ADDED Viewed

	@@ -0,0 +1,366 @@

+"""
+PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation
+Official implementation of the paper:
+"PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation"
+by Xiaohang Yu, Ti Wang, and Mackenzie Weygandt Mathis
+Licensed under a modified MIT license
+"""
+from inspect import isfunction
+from typing import Callable, Optional
+import torch
+from einops import rearrange
+from einops.layers.torch import Rearrange
+from torch import nn
+from .t_cond_mlp import (
+    AdaptiveLayerNorm1D,
+    FrequencyEmbedder,
+    normalization_layer,
+)
+def exists(val):
+    return val is not None
+def default(val, d):
+    if exists(val):
+        return val
+    return d() if isfunction(d) else d
+class PreNorm(nn.Module):
+    def __init__(self, dim: int, fn: Callable, norm: str = "layer", norm_cond_dim: int = -1):
+        super().__init__()
+        self.norm = normalization_layer(norm, dim, norm_cond_dim)
+        self.fn = fn
+    def forward(self, x: torch.Tensor, *args, **kwargs):
+        if isinstance(self.norm, AdaptiveLayerNorm1D):
+            return self.fn(self.norm(x, *args), **kwargs)
+        else:
+            return self.fn(self.norm(x), **kwargs)
+class FeedForward(nn.Module):
+    def __init__(self, dim, hidden_dim, dropout=0.0):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(dim, hidden_dim),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_dim, dim),
+            nn.Dropout(dropout),
+        )
+    def forward(self, x):
+        return self.net(x)
+class Attention(nn.Module):
+    def __init__(self, dim, heads=8, dim_head=64, dropout=0.0):
+        super().__init__()
+        inner_dim = dim_head * heads
+        project_out = not (heads == 1 and dim_head == dim)
+        self.heads = heads
+        self.scale = dim_head**-0.5
+        self.attend = nn.Softmax(dim=-1)
+        self.dropout = nn.Dropout(dropout)
+        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
+        self.to_out = (
+            nn.Sequential(nn.Linear(inner_dim, dim), nn.Dropout(dropout))
+            if project_out
+            else nn.Identity()
+        )
+    def forward(self, x):
+        qkv = self.to_qkv(x).chunk(3, dim=-1)
+        q, k, v = map(lambda t: rearrange(t, "b n (h d) -> b h n d", h=self.heads), qkv)
+        dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
+        attn = self.attend(dots)
+        attn = self.dropout(attn)
+        out = torch.matmul(attn, v)
+        out = rearrange(out, "b h n d -> b n (h d)")
+        return self.to_out(out)
+class CrossAttention(nn.Module):
+    def __init__(self, dim, context_dim=None, heads=8, dim_head=64, dropout=0.0):
+        super().__init__()
+        inner_dim = dim_head * heads
+        project_out = not (heads == 1 and dim_head == dim)
+        self.heads = heads
+        self.scale = dim_head**-0.5
+        self.attend = nn.Softmax(dim=-1)
+        self.dropout = nn.Dropout(dropout)
+        context_dim = default(context_dim, dim)
+        self.to_kv = nn.Linear(context_dim, inner_dim * 2, bias=False)
+        self.to_q = nn.Linear(dim, inner_dim, bias=False)
+        self.to_out = (
+            nn.Sequential(nn.Linear(inner_dim, dim), nn.Dropout(dropout))
+            if project_out
+            else nn.Identity()
+        )
+    def forward(self, x, context=None):
+        context = default(context, x)
+        k, v = self.to_kv(context).chunk(2, dim=-1)
+        q = self.to_q(x)
+        q, k, v = map(lambda t: rearrange(t, "b n (h d) -> b h n d", h=self.heads), [q, k, v])
+        dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
+        attn = self.attend(dots)
+        attn = self.dropout(attn)
+        out = torch.matmul(attn, v)
+        out = rearrange(out, "b h n d -> b n (h d)")
+        return self.to_out(out)
+class Transformer(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        depth: int,
+        heads: int,
+        dim_head: int,
+        mlp_dim: int,
+        dropout: float = 0.0,
+        norm: str = "layer",
+        norm_cond_dim: int = -1,
+    ):
+        super().__init__()
+        self.layers = nn.ModuleList([])
+        for _ in range(depth):
+            sa = Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout)
+            ff = FeedForward(dim, mlp_dim, dropout=dropout)
+            self.layers.append(
+                nn.ModuleList(
+                    [
+                        PreNorm(dim, sa, norm=norm, norm_cond_dim=norm_cond_dim),
+                        PreNorm(dim, ff, norm=norm, norm_cond_dim=norm_cond_dim),
+                    ]
+                )
+            )
+    def forward(self, x: torch.Tensor, *args):
+        for attn, ff in self.layers:
+            x = attn(x, *args) + x
+            x = ff(x, *args) + x
+        return x
+class TransformerCrossAttn(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        depth: int,
+        heads: int,
+        dim_head: int,
+        mlp_dim: int,
+        dropout: float = 0.0,
+        norm: str = "layer",
+        norm_cond_dim: int = -1,
+        context_dim: Optional[int] = None,
+    ):
+        super().__init__()
+        self.layers = nn.ModuleList([])
+        for _ in range(depth):
+            sa = Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout)
+            ca = CrossAttention(
+                dim, context_dim=context_dim, heads=heads, dim_head=dim_head, dropout=dropout
+            )
+            ff = FeedForward(dim, mlp_dim, dropout=dropout)
+            self.layers.append(
+                nn.ModuleList(
+                    [
+                        PreNorm(dim, sa, norm=norm, norm_cond_dim=norm_cond_dim),
+                        PreNorm(dim, ca, norm=norm, norm_cond_dim=norm_cond_dim),
+                        PreNorm(dim, ff, norm=norm, norm_cond_dim=norm_cond_dim),
+                    ]
+                )
+            )
+    def forward(self, x: torch.Tensor, *args, context=None, context_list=None):
+        if context_list is None:
+            context_list = [context] * len(self.layers)
+        if len(context_list) != len(self.layers):
+            raise ValueError(f"len(context_list) != len(self.layers) ({len(context_list)} != {len(self.layers)})")
+        for i, (self_attn, cross_attn, ff) in enumerate(self.layers):
+            x = self_attn(x, *args) + x
+            x = cross_attn(x, *args, context=context_list[i]) + x
+            x = ff(x, *args) + x
+        return x
+class DropTokenDropout(nn.Module):
+    def __init__(self, p: float = 0.1):
+        super().__init__()
+        if p < 0 or p > 1:
+            raise ValueError(
+                "dropout probability has to be between 0 and 1, " "but got {}".format(p)
+            )
+        self.p = p
+    def forward(self, x: torch.Tensor):
+        # x: (batch_size, seq_len, dim)
+        if self.training and self.p > 0:
+            zero_mask = torch.full_like(x[0, :, 0], self.p).bernoulli().bool()
+            if zero_mask.any():
+                x = x[:, ~zero_mask, :]
+        return x
+class ZeroTokenDropout(nn.Module):
+    def __init__(self, p: float = 0.1):
+        super().__init__()
+        if p < 0 or p > 1:
+            raise ValueError(
+                "dropout probability has to be between 0 and 1, " "but got {}".format(p)
+            )
+        self.p = p
+    def forward(self, x: torch.Tensor):
+        # x: (batch_size, seq_len, dim)
+        if self.training and self.p > 0:
+            zero_mask = torch.full_like(x[:, :, 0], self.p).bernoulli().bool()
+            # Zero-out the masked tokens
+            x[zero_mask, :] = 0
+        return x
+class TransformerEncoder(nn.Module):
+    def __init__(
+        self,
+        num_tokens: int,
+        token_dim: int,
+        dim: int,
+        depth: int,
+        heads: int,
+        mlp_dim: int,
+        dim_head: int = 64,
+        dropout: float = 0.0,
+        emb_dropout: float = 0.0,
+        emb_dropout_type: str = "drop",
+        emb_dropout_loc: str = "token",
+        norm: str = "layer",
+        norm_cond_dim: int = -1,
+        token_pe_numfreq: int = -1,
+    ):
+        super().__init__()
+        if token_pe_numfreq > 0:
+            token_dim_new = token_dim * (2 * token_pe_numfreq + 1)
+            self.to_token_embedding = nn.Sequential(
+                Rearrange("b n d -> (b n) d", n=num_tokens, d=token_dim),
+                FrequencyEmbedder(token_pe_numfreq, token_pe_numfreq - 1),
+                Rearrange("(b n) d -> b n d", n=num_tokens, d=token_dim_new),
+                nn.Linear(token_dim_new, dim),
+            )
+        else:
+            self.to_token_embedding = nn.Linear(token_dim, dim)
+        self.pos_embedding = nn.Parameter(torch.randn(1, num_tokens, dim))
+        if emb_dropout_type == "drop":
+            self.dropout = DropTokenDropout(emb_dropout)
+        elif emb_dropout_type == "zero":
+            self.dropout = ZeroTokenDropout(emb_dropout)
+        else:
+            raise ValueError(f"Unknown emb_dropout_type: {emb_dropout_type}")
+        self.emb_dropout_loc = emb_dropout_loc
+        self.transformer = Transformer(
+            dim, depth, heads, dim_head, mlp_dim, dropout, norm=norm, norm_cond_dim=norm_cond_dim
+        )
+    def forward(self, inp: torch.Tensor, *args, **kwargs):
+        x = inp
+        if self.emb_dropout_loc == "input":
+            x = self.dropout(x)
+        x = self.to_token_embedding(x)
+        if self.emb_dropout_loc == "token":
+            x = self.dropout(x)
+        b, n, _ = x.shape
+        x += self.pos_embedding[:, :n]
+        if self.emb_dropout_loc == "token_afterpos":
+            x = self.dropout(x)
+        x = self.transformer(x, *args)
+        return x
+class TransformerDecoder(nn.Module):
+    def __init__(
+        self,
+        num_tokens: int,
+        token_dim: int,
+        dim: int,
+        depth: int,
+        heads: int,
+        mlp_dim: int,
+        dim_head: int = 64,
+        dropout: float = 0.0,
+        emb_dropout: float = 0.0,
+        emb_dropout_type: str = 'drop',
+        norm: str = "layer",
+        norm_cond_dim: int = -1,
+        context_dim: Optional[int] = None,
+        skip_token_embedding: bool = False,
+    ):
+        super().__init__()
+        if not skip_token_embedding:
+            self.to_token_embedding = nn.Linear(token_dim, dim)
+        else:
+            self.to_token_embedding = nn.Identity()
+            if token_dim != dim:
+                raise ValueError(
+                    f"token_dim ({token_dim}) != dim ({dim}) when skip_token_embedding is True"
+                )
+        self.pos_embedding = nn.Parameter(torch.randn(1, num_tokens, dim))
+        if emb_dropout_type == "drop":
+            self.dropout = DropTokenDropout(emb_dropout)
+        elif emb_dropout_type == "zero":
+            self.dropout = ZeroTokenDropout(emb_dropout)
+        elif emb_dropout_type == "normal":
+            self.dropout = nn.Dropout(emb_dropout)
+        self.transformer = TransformerCrossAttn(
+            dim,
+            depth,
+            heads,
+            dim_head,
+            mlp_dim,
+            dropout,
+            norm=norm,
+            norm_cond_dim=norm_cond_dim,
+            context_dim=context_dim,
+        )
+    def forward(self, inp: torch.Tensor, *args, context=None, context_list=None):
+        x = self.to_token_embedding(inp)
+        b, n, _ = x.shape
+        x = self.dropout(x)
+        x += self.pos_embedding[:, :n]
+        x = self.transformer(x, *args, context=context, context_list=context_list)
+        return x

prima/models/components/position_encoding.py ADDED Viewed

	@@ -0,0 +1,84 @@

+"""
+PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation
+Official implementation of the paper:
+"PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation"
+by Xiaohang Yu, Ti Wang, and Mackenzie Weygandt Mathis
+Licensed under a modified MIT license
+"""
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import math
+from typing import Any, Optional, Tuple
+import numpy as np
+import torch
+from torch import nn
+# Rotary Positional Encoding, adapted from:
+# 1. https://github.com/meta-llama/codellama/blob/main/llama/model.py
+# 2. https://github.com/naver-ai/rope-vit
+# 3. https://github.com/lucidrains/rotary-embedding-torch
+def init_t_xy(end_x: int, end_y: int):
+    t = torch.arange(end_x * end_y, dtype=torch.float32)
+    t_x = (t % end_x).float()
+    t_y = torch.div(t, end_x, rounding_mode="floor").float()
+    return t_x, t_y
+def compute_axial_cis(dim: int, end_x: int, end_y: int, theta: float = 10000.0):
+    freqs_x = 1.0 / (theta ** (torch.arange(0, dim, 4)[: (dim // 4)].float() / dim))
+    freqs_y = 1.0 / (theta ** (torch.arange(0, dim, 4)[: (dim // 4)].float() / dim))
+    t_x, t_y = init_t_xy(end_x, end_y)
+    freqs_x = torch.outer(t_x, freqs_x)
+    freqs_y = torch.outer(t_y, freqs_y)
+    freqs_cis_x = torch.polar(torch.ones_like(freqs_x), freqs_x)
+    freqs_cis_y = torch.polar(torch.ones_like(freqs_y), freqs_y)
+    return torch.cat([freqs_cis_x, freqs_cis_y], dim=-1)
+def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor):
+    ndim = x.ndim
+    assert 0 <= 1 < ndim
+    assert freqs_cis.shape == (x.shape[-2], x.shape[-1])
+    shape = [d if i >= ndim - 2 else 1 for i, d in enumerate(x.shape)]
+    return freqs_cis.view(*shape)
+def apply_rotary_enc(
+    xq: torch.Tensor,
+    xk: torch.Tensor,
+    freqs_cis: torch.Tensor,
+    repeat_freqs_k: bool = False,
+):
+    xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1], -1, 2))
+    xk_ = (
+        torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2))
+        if xk.shape[-2] != 0
+        else None
+    )
+    freqs_cis = reshape_for_broadcast(freqs_cis, xq_)
+    xq_out = torch.view_as_real(xq_ * freqs_cis).flatten(3)
+    if xk_ is None:
+        # no keys to rotate, due to dropout
+        return xq_out.type_as(xq).to(xq.device), xk
+    # repeat freqs along seq_len dim to match k seq_len
+    if repeat_freqs_k:
+        r = xk_.shape[-2] // xq_.shape[-2]
+        if freqs_cis.is_cuda:
+            freqs_cis = freqs_cis.repeat(*([1] * (freqs_cis.ndim - 2)), r, 1)
+        else:
+            # torch.repeat on complex numbers may not be supported on non-CUDA devices
+            # (freqs_cis has 4 dims and we repeat on dim 2) so we use expand + flatten
+            freqs_cis = freqs_cis.unsqueeze(2).expand(-1, -1, r, -1, -1).flatten(2, 3)
+    xk_out = torch.view_as_real(xk_ * freqs_cis).flatten(3)
+    return xq_out.type_as(xq).to(xq.device), xk_out.type_as(xk).to(xk.device)

prima/models/components/t_cond_mlp.py ADDED Viewed

	@@ -0,0 +1,204 @@

+"""
+PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation
+Official implementation of the paper:
+"PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation"
+by Xiaohang Yu, Ti Wang, and Mackenzie Weygandt Mathis
+Licensed under a modified MIT license
+"""
+import copy
+from typing import List, Optional
+import torch
+class AdaptiveLayerNorm1D(torch.nn.Module):
+    def __init__(self, data_dim: int, norm_cond_dim: int):
+        super().__init__()
+        if data_dim <= 0:
+            raise ValueError(f"data_dim must be positive, but got {data_dim}")
+        if norm_cond_dim <= 0:
+            raise ValueError(f"norm_cond_dim must be positive, but got {norm_cond_dim}")
+        self.norm = torch.nn.LayerNorm(data_dim)
+        self.linear = torch.nn.Linear(norm_cond_dim, 2 * data_dim)
+        torch.nn.init.zeros_(self.linear.weight)
+        torch.nn.init.zeros_(self.linear.bias)
+    def forward(self, x: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
+        # x: (batch, ..., data_dim)
+        # t: (batch, norm_cond_dim)
+        # return: (batch, data_dim)
+        x = self.norm(x)
+        alpha, beta = self.linear(t).chunk(2, dim=-1)
+        # Add singleton dimensions to alpha and beta
+        if x.dim() > 2:
+            alpha = alpha.view(alpha.shape[0], *([1] * (x.dim() - 2)), alpha.shape[1])
+            beta = beta.view(beta.shape[0], *([1] * (x.dim() - 2)), beta.shape[1])
+        return x * (1 + alpha) + beta
+class SequentialCond(torch.nn.Sequential):
+    def forward(self, input, *args, **kwargs):
+        for module in self:
+            if isinstance(module, (AdaptiveLayerNorm1D, SequentialCond, ResidualMLPBlock)):
+                input = module(input, *args, **kwargs)
+            else:
+                input = module(input)
+        return input
+def normalization_layer(norm: Optional[str], dim: int, norm_cond_dim: int = -1):
+    if norm == "batch":
+        return torch.nn.BatchNorm1d(dim)
+    elif norm == "layer":
+        return torch.nn.LayerNorm(dim)
+    elif norm == "ada":
+        assert norm_cond_dim > 0, f"norm_cond_dim must be positive, got {norm_cond_dim}"
+        return AdaptiveLayerNorm1D(dim, norm_cond_dim)
+    elif norm is None:
+        return torch.nn.Identity()
+    else:
+        raise ValueError(f"Unknown norm: {norm}")
+def linear_norm_activ_dropout(
+    input_dim: int,
+    output_dim: int,
+    activation: torch.nn.Module = torch.nn.ReLU(),
+    bias: bool = True,
+    norm: Optional[str] = "layer",  # Options: ada/batch/layer
+    dropout: float = 0.0,
+    norm_cond_dim: int = -1,
+) -> SequentialCond:
+    layers = []
+    layers.append(torch.nn.Linear(input_dim, output_dim, bias=bias))
+    if norm is not None:
+        layers.append(normalization_layer(norm, output_dim, norm_cond_dim))
+    layers.append(copy.deepcopy(activation))
+    if dropout > 0.0:
+        layers.append(torch.nn.Dropout(dropout))
+    return SequentialCond(*layers)
+def create_simple_mlp(
+    input_dim: int,
+    hidden_dims: List[int],
+    output_dim: int,
+    activation: torch.nn.Module = torch.nn.ReLU(),
+    bias: bool = True,
+    norm: Optional[str] = "layer",  # Options: ada/batch/layer
+    dropout: float = 0.0,
+    norm_cond_dim: int = -1,
+) -> SequentialCond:
+    layers = []
+    prev_dim = input_dim
+    for hidden_dim in hidden_dims:
+        layers.extend(
+            linear_norm_activ_dropout(
+                prev_dim, hidden_dim, activation, bias, norm, dropout, norm_cond_dim
+            )
+        )
+        prev_dim = hidden_dim
+    layers.append(torch.nn.Linear(prev_dim, output_dim, bias=bias))
+    return SequentialCond(*layers)
+class ResidualMLPBlock(torch.nn.Module):
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        num_hidden_layers: int,
+        output_dim: int,
+        activation: torch.nn.Module = torch.nn.ReLU(),
+        bias: bool = True,
+        norm: Optional[str] = "layer",  # Options: ada/batch/layer
+        dropout: float = 0.0,
+        norm_cond_dim: int = -1,
+    ):
+        super().__init__()
+        if not (input_dim == output_dim == hidden_dim):
+            raise NotImplementedError(
+                f"input_dim {input_dim} != output_dim {output_dim} is not implemented"
+            )
+        layers = []
+        prev_dim = input_dim
+        for i in range(num_hidden_layers):
+            layers.append(
+                linear_norm_activ_dropout(
+                    prev_dim, hidden_dim, activation, bias, norm, dropout, norm_cond_dim
+                )
+            )
+            prev_dim = hidden_dim
+        self.model = SequentialCond(*layers)
+        self.skip = torch.nn.Identity()
+    def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
+        return x + self.model(x, *args, **kwargs)
+class ResidualMLP(torch.nn.Module):
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        num_hidden_layers: int,
+        output_dim: int,
+        activation: torch.nn.Module = torch.nn.ReLU(),
+        bias: bool = True,
+        norm: Optional[str] = "layer",  # Options: ada/batch/layer
+        dropout: float = 0.0,
+        num_blocks: int = 1,
+        norm_cond_dim: int = -1,
+    ):
+        super().__init__()
+        self.input_dim = input_dim
+        self.model = SequentialCond(
+            linear_norm_activ_dropout(
+                input_dim, hidden_dim, activation, bias, norm, dropout, norm_cond_dim
+            ),
+            *[
+                ResidualMLPBlock(
+                    hidden_dim,
+                    hidden_dim,
+                    num_hidden_layers,
+                    hidden_dim,
+                    activation,
+                    bias,
+                    norm,
+                    dropout,
+                    norm_cond_dim,
+                )
+                for _ in range(num_blocks)
+            ],
+            torch.nn.Linear(hidden_dim, output_dim, bias=bias),
+        )
+    def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
+        return self.model(x, *args, **kwargs)
+class FrequencyEmbedder(torch.nn.Module):
+    def __init__(self, num_frequencies, max_freq_log2):
+        super().__init__()
+        frequencies = 2 ** torch.linspace(0, max_freq_log2, steps=num_frequencies)
+        self.register_buffer("frequencies", frequencies)
+    def forward(self, x):
+        # x should be of size (N,) or (N, D)
+        N = x.size(0)
+        if x.dim() == 1:  # (N,)
+            x = x.unsqueeze(1)  # (N, D) where D=1
+        x_unsqueezed = x.unsqueeze(-1)  # (N, D, 1)
+        scaled = self.frequencies.view(1, 1, -1) * x_unsqueezed  # (N, D, num_frequencies)
+        s = torch.sin(scaled)
+        c = torch.cos(scaled)
+        embedded = torch.cat([s, c, x_unsqueezed], dim=-1).view(
+            N, -1
+        )  # (N, D * 2 * num_frequencies + D)
+        return embedded