Upload folder using huggingface_hub
Browse files- configs/callbacks/vis/vis_unity_val.yaml +3 -0
- genmo/callbacks/vis/vis_unity_val.py +64 -19
- genmo/utils/vis/renderer.py +7 -2
- scripts/train.py +17 -1
- third_party/GVHMR/hmr4d/utils/vis/renderer.py +7 -2
- third_party/GVHMR/process_data.sh +1 -1
- third_party/GVHMR/tools/demo/process_dataset.py +1090 -209
- train.log +632 -0
- train.sh +9 -1
configs/callbacks/vis/vis_unity_val.yaml
CHANGED
|
@@ -3,6 +3,9 @@ vis_unity_val:
|
|
| 3 |
enabled: false
|
| 4 |
every_n_epochs: 1
|
| 5 |
num_batches: 1
|
|
|
|
|
|
|
|
|
|
| 6 |
num_frames: 30
|
| 7 |
render_incam: true
|
| 8 |
render_global: true
|
|
|
|
| 3 |
enabled: false
|
| 4 |
every_n_epochs: 1
|
| 5 |
num_batches: 1
|
| 6 |
+
# Which val batches to render: "first" or "random".
|
| 7 |
+
batch_select: "first"
|
| 8 |
+
batch_select_seed: 123
|
| 9 |
num_frames: 30
|
| 10 |
render_incam: true
|
| 11 |
render_global: true
|
genmo/callbacks/vis/vis_unity_val.py
CHANGED
|
@@ -31,6 +31,8 @@ class VisUnityVal(pl.Callback):
|
|
| 31 |
num_frames: int = 30,
|
| 32 |
render_incam: bool = True,
|
| 33 |
render_global: bool = True,
|
|
|
|
|
|
|
| 34 |
use_gt_betas_for_pred: bool = True,
|
| 35 |
global_root_relative: bool = False,
|
| 36 |
postprocess_global: bool = True,
|
|
@@ -46,6 +48,8 @@ class VisUnityVal(pl.Callback):
|
|
| 46 |
self.num_frames = num_frames
|
| 47 |
self.render_incam = render_incam
|
| 48 |
self.render_global = render_global
|
|
|
|
|
|
|
| 49 |
self.use_gt_betas_for_pred = use_gt_betas_for_pred
|
| 50 |
self.global_root_relative = global_root_relative
|
| 51 |
self.postprocess_global = postprocess_global
|
|
@@ -58,6 +62,41 @@ class VisUnityVal(pl.Callback):
|
|
| 58 |
self._smplx2smpl = None
|
| 59 |
self._faces = None
|
| 60 |
self._J_regressor = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
def _lazy_init_models(self, device: torch.device):
|
| 63 |
if self._smplx is None:
|
|
@@ -118,8 +157,19 @@ class VisUnityVal(pl.Callback):
|
|
| 118 |
return
|
| 119 |
if self.every_n_epochs is not None and (trainer.current_epoch % self.every_n_epochs) != 0:
|
| 120 |
return
|
| 121 |
-
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
if outputs is None or "pred_smpl_params_incam" not in outputs:
|
| 125 |
Log.warning("[VisUnityVal] Missing `pred_smpl_params_incam` in outputs; skipping.")
|
|
@@ -128,33 +178,31 @@ class VisUnityVal(pl.Callback):
|
|
| 128 |
meta_render = None
|
| 129 |
if "meta_render" in batch and isinstance(batch["meta_render"], list) and batch["meta_render"]:
|
| 130 |
meta_render = batch["meta_render"][0]
|
| 131 |
-
|
| 132 |
-
if not img_paths:
|
| 133 |
-
Log.warning("[VisUnityVal] Missing `meta_render.img_paths`; skipping incam rendering.")
|
| 134 |
-
return
|
| 135 |
|
| 136 |
vid = batch["meta"][0].get("vid", f"b{batch_idx:03d}")
|
| 137 |
vid = self._safe_vid(str(vid))
|
| 138 |
|
| 139 |
# Pick frames to render (within the already-sliced/padded window).
|
| 140 |
-
L = int(batch["K_fullimg"].shape[1]) if "K_fullimg" in batch else
|
| 141 |
if L <= 0:
|
| 142 |
return
|
| 143 |
num_frames = min(self.num_frames, L)
|
| 144 |
frame_idxs = np.linspace(0, L - 1, num_frames).round().astype(int)
|
| 145 |
|
| 146 |
-
# Read one frame to get size.
|
| 147 |
-
first = cv2.imread(img_paths[int(frame_idxs[0])])
|
| 148 |
-
if first is None:
|
| 149 |
-
Log.warning(f"[VisUnityVal] Failed to read image: {img_paths[int(frame_idxs[0])]}")
|
| 150 |
-
return
|
| 151 |
-
height, width = first.shape[:2]
|
| 152 |
-
|
| 153 |
device = pl_module.device
|
| 154 |
self._lazy_init_models(device)
|
| 155 |
|
| 156 |
-
#
|
| 157 |
K = batch["K_fullimg"][0, 0].to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
renderer_incam = Renderer(width, height, device=device, faces=self._faces, K=K)
|
| 159 |
# Make the overlay look "flat colored" (no Phong shading).
|
| 160 |
try:
|
|
@@ -190,10 +238,7 @@ class VisUnityVal(pl.Callback):
|
|
| 190 |
renderer_incam.set_intrinsic(K_fi)
|
| 191 |
except Exception:
|
| 192 |
pass
|
| 193 |
-
|
| 194 |
-
if frame_bgr is None:
|
| 195 |
-
continue
|
| 196 |
-
frame = frame_bgr[..., ::-1] # RGB
|
| 197 |
img = renderer_incam.render_mesh(gt_verts_incam[i], frame, colors=self.gt_color)
|
| 198 |
img = renderer_incam.render_mesh(pred_verts_incam[i], img, colors=self.pred_color)
|
| 199 |
writer.write_frame(img.astype(np.uint8))
|
|
|
|
| 31 |
num_frames: int = 30,
|
| 32 |
render_incam: bool = True,
|
| 33 |
render_global: bool = True,
|
| 34 |
+
batch_select: str = "first",
|
| 35 |
+
batch_select_seed: int = 123,
|
| 36 |
use_gt_betas_for_pred: bool = True,
|
| 37 |
global_root_relative: bool = False,
|
| 38 |
postprocess_global: bool = True,
|
|
|
|
| 48 |
self.num_frames = num_frames
|
| 49 |
self.render_incam = render_incam
|
| 50 |
self.render_global = render_global
|
| 51 |
+
self.batch_select = str(batch_select or "first").strip().lower()
|
| 52 |
+
self.batch_select_seed = int(batch_select_seed)
|
| 53 |
self.use_gt_betas_for_pred = use_gt_betas_for_pred
|
| 54 |
self.global_root_relative = global_root_relative
|
| 55 |
self.postprocess_global = postprocess_global
|
|
|
|
| 62 |
self._smplx2smpl = None
|
| 63 |
self._faces = None
|
| 64 |
self._J_regressor = None
|
| 65 |
+
self._selected_batch_idxs_by_loader = {}
|
| 66 |
+
self._seen_batch_count_by_loader = {}
|
| 67 |
+
|
| 68 |
+
def on_validation_epoch_start(self, trainer, pl_module):
|
| 69 |
+
self._selected_batch_idxs_by_loader = {}
|
| 70 |
+
self._seen_batch_count_by_loader = {}
|
| 71 |
+
if not self.enabled:
|
| 72 |
+
return
|
| 73 |
+
if trainer.global_rank != 0:
|
| 74 |
+
return
|
| 75 |
+
if self.every_n_epochs is not None and (trainer.current_epoch % self.every_n_epochs) != 0:
|
| 76 |
+
return
|
| 77 |
+
|
| 78 |
+
# Try to deterministically select which batches to render for each val dataloader.
|
| 79 |
+
try:
|
| 80 |
+
num_val_batches = getattr(trainer, "num_val_batches", None)
|
| 81 |
+
if num_val_batches is None:
|
| 82 |
+
return
|
| 83 |
+
if isinstance(num_val_batches, int):
|
| 84 |
+
num_val_batches = [num_val_batches]
|
| 85 |
+
for dl_idx, n in enumerate(list(num_val_batches)):
|
| 86 |
+
n = int(n)
|
| 87 |
+
if n <= 0:
|
| 88 |
+
self._selected_batch_idxs_by_loader[dl_idx] = set()
|
| 89 |
+
continue
|
| 90 |
+
k = min(int(self.num_batches), n)
|
| 91 |
+
if self.batch_select == "random":
|
| 92 |
+
rng = np.random.default_rng(int(self.batch_select_seed) + int(trainer.current_epoch) * 1000 + int(dl_idx))
|
| 93 |
+
chosen = rng.choice(np.arange(n, dtype=np.int64), size=k, replace=False)
|
| 94 |
+
self._selected_batch_idxs_by_loader[dl_idx] = set(int(x) for x in chosen.tolist())
|
| 95 |
+
else:
|
| 96 |
+
self._selected_batch_idxs_by_loader[dl_idx] = set(range(k))
|
| 97 |
+
except Exception:
|
| 98 |
+
# Fallback: keep legacy behavior (first N batches).
|
| 99 |
+
self._selected_batch_idxs_by_loader = {}
|
| 100 |
|
| 101 |
def _lazy_init_models(self, device: torch.device):
|
| 102 |
if self._smplx is None:
|
|
|
|
| 157 |
return
|
| 158 |
if self.every_n_epochs is not None and (trainer.current_epoch % self.every_n_epochs) != 0:
|
| 159 |
return
|
| 160 |
+
dl_i = int(dataloader_idx)
|
| 161 |
+
local_idx = int(self._seen_batch_count_by_loader.get(dl_i, 0))
|
| 162 |
+
self._seen_batch_count_by_loader[dl_i] = local_idx + 1
|
| 163 |
+
|
| 164 |
+
selected = self._selected_batch_idxs_by_loader.get(dl_i, None)
|
| 165 |
+
if selected is None:
|
| 166 |
+
# Fallback: legacy behavior.
|
| 167 |
+
if batch_idx >= self.num_batches:
|
| 168 |
+
return
|
| 169 |
+
else:
|
| 170 |
+
# Use loader-local index (CombinedLoader may provide a global `batch_idx`).
|
| 171 |
+
if local_idx not in selected:
|
| 172 |
+
return
|
| 173 |
|
| 174 |
if outputs is None or "pred_smpl_params_incam" not in outputs:
|
| 175 |
Log.warning("[VisUnityVal] Missing `pred_smpl_params_incam` in outputs; skipping.")
|
|
|
|
| 178 |
meta_render = None
|
| 179 |
if "meta_render" in batch and isinstance(batch["meta_render"], list) and batch["meta_render"]:
|
| 180 |
meta_render = batch["meta_render"][0]
|
| 181 |
+
# NOTE: Do not depend on image/video I/O for validation visualization; render on black.
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
vid = batch["meta"][0].get("vid", f"b{batch_idx:03d}")
|
| 184 |
vid = self._safe_vid(str(vid))
|
| 185 |
|
| 186 |
# Pick frames to render (within the already-sliced/padded window).
|
| 187 |
+
L = int(batch["K_fullimg"].shape[1]) if "K_fullimg" in batch else 0
|
| 188 |
if L <= 0:
|
| 189 |
return
|
| 190 |
num_frames = min(self.num_frames, L)
|
| 191 |
frame_idxs = np.linspace(0, L - 1, num_frames).round().astype(int)
|
| 192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
device = pl_module.device
|
| 194 |
self._lazy_init_models(device)
|
| 195 |
|
| 196 |
+
# Render on black; infer output size from principal point (usually near W/2, H/2).
|
| 197 |
K = batch["K_fullimg"][0, 0].to(device)
|
| 198 |
+
try:
|
| 199 |
+
cx = float(K[0, 2].detach().cpu().item())
|
| 200 |
+
cy = float(K[1, 2].detach().cpu().item())
|
| 201 |
+
width = max(64, int(round(cx * 2.0)))
|
| 202 |
+
height = max(64, int(round(cy * 2.0)))
|
| 203 |
+
except Exception:
|
| 204 |
+
width, height = 1280, 720
|
| 205 |
+
|
| 206 |
renderer_incam = Renderer(width, height, device=device, faces=self._faces, K=K)
|
| 207 |
# Make the overlay look "flat colored" (no Phong shading).
|
| 208 |
try:
|
|
|
|
| 238 |
renderer_incam.set_intrinsic(K_fi)
|
| 239 |
except Exception:
|
| 240 |
pass
|
| 241 |
+
frame = np.zeros((height, width, 3), dtype=np.uint8) # RGB black
|
|
|
|
|
|
|
|
|
|
| 242 |
img = renderer_incam.render_mesh(gt_verts_incam[i], frame, colors=self.gt_color)
|
| 243 |
img = renderer_incam.render_mesh(pred_verts_incam[i], img, colors=self.pred_color)
|
| 244 |
writer.write_frame(img.astype(np.uint8))
|
genmo/utils/vis/renderer.py
CHANGED
|
@@ -291,8 +291,13 @@ class Renderer:
|
|
| 291 |
verts_features = colors.to(device=vertices.device, dtype=vertices.dtype)
|
| 292 |
colors = [0.8, 0.8, 0.8]
|
| 293 |
else:
|
| 294 |
-
|
| 295 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
verts_features = (
|
| 297 |
torch.tensor(colors)
|
| 298 |
.reshape(1, 1, 3)
|
|
|
|
| 291 |
verts_features = colors.to(device=vertices.device, dtype=vertices.dtype)
|
| 292 |
colors = [0.8, 0.8, 0.8]
|
| 293 |
else:
|
| 294 |
+
# Accept either [0..1] floats or [0..255] uint8-like colors.
|
| 295 |
+
# Don't key off `colors[0]` because valid RGB like green [0,255,0] would fail.
|
| 296 |
+
try:
|
| 297 |
+
if max(colors) > 1:
|
| 298 |
+
colors = [c / 255.0 for c in colors]
|
| 299 |
+
except Exception:
|
| 300 |
+
pass
|
| 301 |
verts_features = (
|
| 302 |
torch.tensor(colors)
|
| 303 |
.reshape(1, 1, 3)
|
scripts/train.py
CHANGED
|
@@ -1,5 +1,21 @@
|
|
| 1 |
-
import builtins
|
| 2 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from datetime import datetime
|
| 4 |
|
| 5 |
import hydra
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
# Ensure repo root is importable when running as `python scripts/train.py`.
|
| 5 |
+
# Without this, `genmo.*` may resolve from site-packages while `third_party.*`
|
| 6 |
+
# (a namespace package in this repo) fails to import, which Hydra reports as
|
| 7 |
+
# "Error locating target ...".
|
| 8 |
+
_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
| 9 |
+
if _REPO_ROOT not in sys.path:
|
| 10 |
+
sys.path.insert(0, _REPO_ROOT)
|
| 11 |
+
|
| 12 |
+
# GVHMR uses absolute imports like `import hmr4d...` internally, so its repo root
|
| 13 |
+
# must also be importable.
|
| 14 |
+
_GVHMR_ROOT = os.path.join(_REPO_ROOT, "third_party", "GVHMR")
|
| 15 |
+
if os.path.isdir(_GVHMR_ROOT) and _GVHMR_ROOT not in sys.path:
|
| 16 |
+
sys.path.insert(0, _GVHMR_ROOT)
|
| 17 |
+
|
| 18 |
+
import builtins
|
| 19 |
from datetime import datetime
|
| 20 |
|
| 21 |
import hydra
|
third_party/GVHMR/hmr4d/utils/vis/renderer.py
CHANGED
|
@@ -223,8 +223,13 @@ class Renderer:
|
|
| 223 |
verts_features = colors.to(device=vertices.device, dtype=vertices.dtype)
|
| 224 |
colors = [0.8, 0.8, 0.8]
|
| 225 |
else:
|
| 226 |
-
|
| 227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
verts_features = torch.tensor(colors).reshape(1, 1, 3).to(device=vertices.device, dtype=vertices.dtype)
|
| 229 |
verts_features = verts_features.repeat(1, vertices.shape[1], 1)
|
| 230 |
textures = TexturesVertex(verts_features=verts_features)
|
|
|
|
| 223 |
verts_features = colors.to(device=vertices.device, dtype=vertices.dtype)
|
| 224 |
colors = [0.8, 0.8, 0.8]
|
| 225 |
else:
|
| 226 |
+
# Accept either [0..1] floats or [0..255] uint8-like colors.
|
| 227 |
+
# Don't key off `colors[0]` because valid RGB like green [0,255,0] would fail.
|
| 228 |
+
try:
|
| 229 |
+
if max(colors) > 1:
|
| 230 |
+
colors = [c / 255.0 for c in colors]
|
| 231 |
+
except Exception:
|
| 232 |
+
pass
|
| 233 |
verts_features = torch.tensor(colors).reshape(1, 1, 3).to(device=vertices.device, dtype=vertices.dtype)
|
| 234 |
verts_features = verts_features.repeat(1, vertices.shape[1], 1)
|
| 235 |
textures = TexturesVertex(verts_features=verts_features)
|
third_party/GVHMR/process_data.sh
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
python tools/demo/process_dataset.py --input /mnt/c/Temp/SyntheticDataset --output ./
|
|
|
|
| 1 |
+
python tools/demo/process_dataset.py --input /mnt/c/Temp/SyntheticDataset --output ./processed_dataset --genmo --debug
|
third_party/GVHMR/tools/demo/process_dataset.py
CHANGED
|
@@ -1,879 +1,1760 @@
|
|
| 1 |
import sys
|
|
|
|
| 2 |
import os
|
|
|
|
| 3 |
import json
|
|
|
|
| 4 |
import argparse
|
|
|
|
| 5 |
import numpy as np
|
|
|
|
| 6 |
import zlib
|
|
|
|
| 7 |
from glob import glob
|
|
|
|
| 8 |
from tqdm import tqdm
|
|
|
|
| 9 |
import cv2
|
|
|
|
| 10 |
import torch
|
|
|
|
| 11 |
from scipy.spatial.transform import Rotation as R
|
|
|
|
| 12 |
import time
|
|
|
|
| 13 |
import shutil
|
|
|
|
| 14 |
from pathlib import Path
|
| 15 |
|
|
|
|
|
|
|
| 16 |
# --- SETUP PATHS FOR IMPORTS ---
|
| 17 |
-
|
|
|
|
|
|
|
| 18 |
if str(REPO_ROOT) not in sys.path:
|
|
|
|
| 19 |
sys.path.insert(0, str(REPO_ROOT))
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
print("WARNING: Could not import Extractor. Feature extraction will fail.")
|
| 32 |
-
Extractor = None
|
| 33 |
-
|
| 34 |
-
# Force single thread for libraries
|
| 35 |
os.environ["OMP_NUM_THREADS"] = "1"
|
|
|
|
| 36 |
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
|
|
|
|
| 37 |
cv2.setNumThreads(0)
|
|
|
|
| 38 |
torch.set_num_threads(1)
|
| 39 |
|
|
|
|
|
|
|
| 40 |
FPS = 30.0
|
| 41 |
-
|
|
|
|
|
|
|
| 42 |
IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
|
|
|
| 43 |
IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
| 44 |
|
| 45 |
-
|
|
|
|
|
|
|
| 46 |
|
| 47 |
def _process_image_memory(img_bgr, bbox_xywh, img_size=256):
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
"""
|
| 51 |
-
if img_bgr is None:
|
| 52 |
-
return np.zeros((3, img_size, img_size), dtype=np.float32)
|
| 53 |
|
| 54 |
x, y, w, h = bbox_xywh
|
|
|
|
| 55 |
cx, cy = x + w/2, y + h/2
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
# Standard HMR extraction uses a specific scale factor.
|
| 60 |
-
# Here we assume the input bbox is the "tight" bbox and we need to square it.
|
| 61 |
-
scale = max(w, h) * 1.2
|
| 62 |
-
|
| 63 |
H, W = img_bgr.shape[:2]
|
|
|
|
| 64 |
max_side = float(max(H, W, 1))
|
| 65 |
|
| 66 |
-
if scale <= 1.0 or scale > max_side * 20.0:
|
| 67 |
-
|
| 68 |
-
scale = max_side * 0.5
|
| 69 |
-
|
| 70 |
half = scale / 2.0
|
|
|
|
| 71 |
x0, y0 = int(cx - half), int(cy - half)
|
|
|
|
| 72 |
x1, y1 = int(cx + half), int(cy + half)
|
| 73 |
|
| 74 |
pad_l, pad_t = max(0, -x0), max(0, -y0)
|
|
|
|
| 75 |
pad_r, pad_b = max(0, x1 - W), max(0, y1 - H)
|
| 76 |
|
| 77 |
-
if max(pad_l, pad_t, pad_r, pad_b) > int(max_side * 4.0):
|
| 78 |
-
# Sanity check fail, return black
|
| 79 |
-
return np.zeros((3, img_size, img_size), dtype=np.float32)
|
| 80 |
|
| 81 |
if pad_l or pad_t or pad_r or pad_b:
|
|
|
|
| 82 |
img_bgr = cv2.copyMakeBorder(img_bgr, pad_t, pad_b, pad_l, pad_r, cv2.BORDER_CONSTANT, value=(0,0,0))
|
|
|
|
| 83 |
x0 += pad_l; y0 += pad_t; x1 += pad_l; y1 += pad_t
|
| 84 |
|
| 85 |
crop = img_bgr[y0:y1, x0:x1]
|
| 86 |
-
|
| 87 |
-
if crop.size == 0:
|
| 88 |
-
|
| 89 |
-
|
| 90 |
if crop.shape[0] != img_size or crop.shape[1] != img_size:
|
|
|
|
| 91 |
crop = cv2.resize(crop, (img_size, img_size), interpolation=cv2.INTER_LINEAR)
|
| 92 |
|
| 93 |
-
|
| 94 |
-
|
| 95 |
crop = (crop - IMAGENET_MEAN) / IMAGENET_STD
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
def _alpha_blend_bgra_onto_bgr(dst_bgr, src_bgra, x, y):
|
|
|
|
| 99 |
if dst_bgr is None or src_bgra is None: return dst_bgr
|
|
|
|
| 100 |
H, W = dst_bgr.shape[:2]
|
|
|
|
| 101 |
h, w = src_bgra.shape[:2]
|
|
|
|
| 102 |
if w <= 0 or h <= 0: return dst_bgr
|
|
|
|
| 103 |
x0, y0 = max(int(x), 0), max(int(y), 0)
|
|
|
|
| 104 |
x1, y1 = min(int(x + w), W), min(int(y + h), H)
|
|
|
|
| 105 |
if x1 <= x0 or y1 <= y0: return dst_bgr
|
| 106 |
-
|
| 107 |
roi = dst_bgr[y0:y1, x0:x1]
|
|
|
|
| 108 |
src_crop = src_bgra[(y0 - int(y)):(y0 - int(y)) + (y1 - y0), (x0 - int(x)):(x0 - int(x)) + (x1 - x0)]
|
| 109 |
-
|
| 110 |
if src_crop.shape[2] == 3:
|
|
|
|
| 111 |
roi[:] = src_crop
|
|
|
|
| 112 |
return dst_bgr
|
| 113 |
|
| 114 |
alpha = src_crop[:, :, 3].astype(np.uint16)
|
|
|
|
| 115 |
inv_alpha = 255 - alpha
|
|
|
|
| 116 |
b_src, g_src, r_src = src_crop[:, :, 0], src_crop[:, :, 1], src_crop[:, :, 2]
|
|
|
|
| 117 |
b_dst, g_dst, r_dst = roi[:, :, 0], roi[:, :, 1], roi[:, :, 2]
|
| 118 |
|
| 119 |
roi[:, :, 0] = ((b_src * alpha + b_dst * inv_alpha) >> 8).astype(np.uint8)
|
|
|
|
| 120 |
roi[:, :, 1] = ((g_src * alpha + g_dst * inv_alpha) >> 8).astype(np.uint8)
|
|
|
|
| 121 |
roi[:, :, 2] = ((r_src * alpha + r_dst * inv_alpha) >> 8).astype(np.uint8)
|
|
|
|
| 122 |
return dst_bgr
|
| 123 |
|
|
|
|
|
|
|
| 124 |
def _find_ui_dir():
|
|
|
|
| 125 |
cand = os.path.join(os.getcwd(), "UI")
|
|
|
|
| 126 |
if os.path.isdir(cand): return cand
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
|
| 132 |
def _find_font_path(ui_dir, filename="Inter_18pt-Bold.ttf"):
|
|
|
|
| 133 |
if not ui_dir: return None
|
|
|
|
| 134 |
p = os.path.join(ui_dir, filename)
|
|
|
|
| 135 |
return p if os.path.isfile(p) else None
|
| 136 |
|
|
|
|
|
|
|
| 137 |
def _load_ui_images(ui_dir):
|
|
|
|
| 138 |
if not ui_dir or (not os.path.isdir(ui_dir)): return []
|
|
|
|
| 139 |
imgs = []
|
|
|
|
| 140 |
for name in sorted(os.listdir(ui_dir)):
|
|
|
|
| 141 |
p = os.path.join(ui_dir, name)
|
|
|
|
| 142 |
if not os.path.isfile(p): continue
|
|
|
|
| 143 |
if name.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
|
|
|
|
| 144 |
im = cv2.imread(p, cv2.IMREAD_UNCHANGED)
|
|
|
|
| 145 |
if im is not None:
|
|
|
|
| 146 |
if im.ndim == 2: im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR)
|
|
|
|
| 147 |
imgs.append(im)
|
|
|
|
| 148 |
return imgs
|
| 149 |
|
|
|
|
|
|
|
| 150 |
class SimpleUIOverlay:
|
|
|
|
| 151 |
def __init__(self, width, height, seed=0, ui_dir=None, max_images=4, show_prob=0.6, min_hold_frames=20, max_hold_frames=120):
|
|
|
|
| 152 |
self.W, self.H = int(width), int(height)
|
|
|
|
| 153 |
self.rng = np.random.default_rng(int(seed))
|
|
|
|
| 154 |
self.max_images = max(0, int(max_images))
|
|
|
|
| 155 |
self.show_prob = float(show_prob)
|
|
|
|
| 156 |
self.min_hold_frames, self.max_hold_frames = max(1, int(min_hold_frames)), max(1, int(max_hold_frames))
|
|
|
|
| 157 |
self.ui_dir = ui_dir if ui_dir else _find_ui_dir()
|
|
|
|
| 158 |
self.assets = _load_ui_images(self.ui_dir)
|
|
|
|
| 159 |
self._ttl, self._active = 0, []
|
| 160 |
|
| 161 |
def _pick_new_state(self):
|
|
|
|
| 162 |
self._ttl = int(self.rng.integers(self.min_hold_frames, self.max_hold_frames + 1))
|
|
|
|
| 163 |
self._active = []
|
|
|
|
| 164 |
if (not self.assets) or (self.max_images <= 0): return
|
|
|
|
| 165 |
if float(self.rng.random()) > self.show_prob: return
|
|
|
|
| 166 |
k = min(int(self.rng.integers(1, self.max_images + 1)), len(self.assets))
|
|
|
|
| 167 |
idxs = self.rng.choice(len(self.assets), size=k, replace=False)
|
|
|
|
| 168 |
for idx in idxs:
|
|
|
|
| 169 |
im = self.assets[int(idx)]
|
|
|
|
| 170 |
h, w = im.shape[:2]
|
|
|
|
| 171 |
if w > 0 and h > 0:
|
|
|
|
| 172 |
x = int(self.rng.integers(-w // 4, max(1, self.W - (3 * w // 4))))
|
|
|
|
| 173 |
y = int(self.rng.integers(-h // 4, max(1, self.H - (3 * h // 4))))
|
|
|
|
| 174 |
self._active.append((im, x, y))
|
| 175 |
|
| 176 |
def draw(self, img_bgr):
|
|
|
|
| 177 |
if img_bgr is None: return img_bgr
|
|
|
|
| 178 |
if self._ttl <= 0: self._pick_new_state()
|
|
|
|
| 179 |
self._ttl -= 1
|
| 180 |
-
|
| 181 |
-
|
|
|
|
| 182 |
return img_bgr
|
| 183 |
|
|
|
|
|
|
|
| 184 |
class SimpleChatOverlay:
|
|
|
|
| 185 |
def __init__(self, width, height, seed=0, num_lines=7, region_w=420, region_h=180, margin=18, every_n_frames=15, corner=None, font_path=None):
|
|
|
|
| 186 |
from collections import deque
|
|
|
|
| 187 |
self.W, self.H = int(width), int(height)
|
|
|
|
| 188 |
self.rng = np.random.default_rng(int(seed))
|
|
|
|
| 189 |
self.num_lines, self.margin, self.every_n_frames = int(num_lines), int(margin), max(1, int(every_n_frames))
|
|
|
|
| 190 |
self.region_w, self.region_h = int(region_w), int(region_h)
|
|
|
|
| 191 |
self.font_path = font_path
|
|
|
|
| 192 |
self._pil_fonts = {}
|
|
|
|
| 193 |
self.corner = str(corner) if corner else str(self.rng.choice(["tl", "tr", "bl", "br"]))
|
|
|
|
| 194 |
self.messages = deque(maxlen=self.num_lines)
|
|
|
|
| 195 |
for _ in range(self.num_lines): self.messages.append(self._random_message())
|
|
|
|
| 196 |
self._cached_overlay, self._dirty = None, True
|
| 197 |
|
| 198 |
def _random_message(self):
|
|
|
|
| 199 |
user = str(self.rng.choice(["nightbot", "viewer", "catjam", "shadow", "speedrunner", "chattycathy", "kappaking"]))
|
|
|
|
| 200 |
if self.rng.random() < 0.5: user += str(self.rng.integers(10, 999))
|
|
|
|
| 201 |
text = str(self.rng.choice(["pog", "lol", "gg", "nice", "W", "L", "no shot", "crazy", "clip it", "cooking", "unlucky"]))
|
|
|
|
| 202 |
color = tuple(int(x) for x in self.rng.choice([(255, 120, 0), (0, 180, 255), (255, 0, 180), (0, 255, 120)]))
|
|
|
|
| 203 |
return {"user": user, "text": text, "color": color}
|
| 204 |
|
| 205 |
def _get_pil_font(self, size_px):
|
|
|
|
| 206 |
if not self.font_path: return None
|
|
|
|
| 207 |
if size_px in self._pil_fonts: return self._pil_fonts[size_px]
|
|
|
|
| 208 |
try:
|
|
|
|
| 209 |
from PIL import ImageFont
|
| 210 |
-
|
| 211 |
-
self.
|
| 212 |
-
|
| 213 |
except: return None
|
| 214 |
|
| 215 |
def maybe_append(self, frame_idx):
|
|
|
|
| 216 |
if int(frame_idx) % self.every_n_frames == 0:
|
|
|
|
| 217 |
self.messages.append(self._random_message())
|
|
|
|
| 218 |
self._dirty = True
|
| 219 |
|
| 220 |
def _render_cache(self):
|
|
|
|
| 221 |
rw = min(self.region_w, max(40, self.W - 2 * self.margin))
|
|
|
|
| 222 |
rh = min(self.region_h, max(40, self.H - 2 * self.margin))
|
|
|
|
| 223 |
pil_font = self._get_pil_font(int(round(np.clip(20.0 * (self.H / 720.0), 14.0, 30.0))))
|
|
|
|
| 224 |
if pil_font is None:
|
| 225 |
-
|
| 226 |
-
return
|
|
|
|
| 227 |
try:
|
|
|
|
| 228 |
from PIL import Image, ImageDraw
|
|
|
|
| 229 |
pil = Image.new("RGBA", (rw, rh), (0, 0, 0, 0))
|
|
|
|
| 230 |
draw = ImageDraw.Draw(pil)
|
| 231 |
-
|
|
|
|
|
|
|
| 232 |
lines = list(self.messages)[-min(self.num_lines, max(1, rh // line_h)):]
|
|
|
|
| 233 |
local_y = rh - line_h if self.corner in ("bl", "br") else 0
|
|
|
|
| 234 |
for msg in lines:
|
|
|
|
| 235 |
user = f"{msg['user']}: "
|
| 236 |
-
|
|
|
|
|
|
|
| 237 |
tw = draw.textlength(user, font=pil_font)
|
|
|
|
| 238 |
draw.text((tw, local_y), msg['text'], font=pil_font, fill=(240, 240, 240))
|
|
|
|
| 239 |
local_y += (-line_h if self.corner in ("bl", "br") else line_h)
|
|
|
|
| 240 |
self._cached_overlay = cv2.cvtColor(np.asarray(pil), cv2.COLOR_RGBA2BGRA)
|
|
|
|
| 241 |
except: self._cached_overlay = None
|
| 242 |
|
| 243 |
def draw(self, img_bgr):
|
|
|
|
| 244 |
if img_bgr is None: return img_bgr
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
if self._cached_overlay is not None:
|
|
|
|
| 249 |
rw = min(self.region_w, max(40, self.W - 2 * self.margin))
|
|
|
|
| 250 |
rh = min(self.region_h, max(40, self.H - 2 * self.margin))
|
|
|
|
| 251 |
if self.corner == "tl": x, y = self.margin, self.margin
|
|
|
|
| 252 |
elif self.corner == "tr": x, y = self.W - self.margin - rw, self.margin
|
|
|
|
| 253 |
elif self.corner == "bl": x, y = self.margin, self.H - self.margin - rh
|
|
|
|
| 254 |
else: x, y = self.W - self.margin - rw, self.H - self.margin - rh
|
|
|
|
| 255 |
_alpha_blend_bgra_onto_bgr(img_bgr, self._cached_overlay, x, y)
|
|
|
|
| 256 |
return img_bgr
|
| 257 |
|
| 258 |
-
|
| 259 |
-
|
|
|
|
| 260 |
|
| 261 |
def bbox_xywh_to_bbx_xys(bbox_xywh, base_enlarge=1.0):
|
|
|
|
| 262 |
x, y, w, h = [float(v) for v in bbox_xywh]
|
|
|
|
| 263 |
return np.array([x + 0.5 * w, y + 0.5 * h, max(w, h) * float(base_enlarge)], dtype=np.float32)
|
| 264 |
|
| 265 |
def clamp_bbox_xywh_to_image(bbox_xywh, W, H, min_size=1.0):
|
|
|
|
| 266 |
x, y, w, h = [float(v) for v in bbox_xywh]
|
|
|
|
| 267 |
W, H = float(W), float(H)
|
|
|
|
| 268 |
if W <= 0 or H <= 0: return [0.0, 0.0, 0.0, 0.0]
|
|
|
|
| 269 |
x2, y2 = x + w, y + h
|
|
|
|
| 270 |
x1c = float(np.clip(x, 0.0, max(0.0, W - 1.0)))
|
|
|
|
| 271 |
y1c = float(np.clip(y, 0.0, max(0.0, H - 1.0)))
|
|
|
|
| 272 |
x2c = float(np.clip(x2, 0.0, W))
|
|
|
|
| 273 |
y2c = float(np.clip(y2, 0.0, H))
|
|
|
|
| 274 |
if x2c <= x1c: x2c = min(W, x1c + float(min_size))
|
|
|
|
| 275 |
if y2c <= y1c: y2c = min(H, y1c + float(min_size))
|
|
|
|
| 276 |
wc = max(0.0, x2c - x1c)
|
|
|
|
| 277 |
hc = max(0.0, y2c - y1c)
|
|
|
|
| 278 |
return [x1c, y1c, wc, hc]
|
| 279 |
|
| 280 |
def draw_bbox_xywh_and_center(img_bgr, bbox_xywh, color=(255, 255, 0)):
|
|
|
|
| 281 |
x, y, w, h = [float(v) for v in bbox_xywh]
|
|
|
|
| 282 |
cv2.rectangle(img_bgr, (int(x), int(y)), (int(x+w), int(y+h)), color, 2)
|
|
|
|
| 283 |
cv2.circle(img_bgr, (int(x+w/2), int(y+h/2)), 4, (0, 0, 255), -1)
|
| 284 |
|
| 285 |
def vis_label_and_color(v: int):
|
|
|
|
| 286 |
if v == 2: return "VIS", (0, 255, 0)
|
|
|
|
| 287 |
if v == 1: return "OCC", (0, 165, 255)
|
|
|
|
| 288 |
return "OFF", (160, 160, 160)
|
| 289 |
|
| 290 |
def draw_vis_text_and_points(img_bgr, kpts2d_xy, vis17):
|
|
|
|
| 291 |
for k in range(17):
|
|
|
|
| 292 |
v = int(vis17[k])
|
|
|
|
| 293 |
label, color = vis_label_and_color(v)
|
|
|
|
| 294 |
x, y = int(round(kpts2d_xy[k, 0])), int(round(kpts2d_xy[k, 1]))
|
|
|
|
| 295 |
if v > 0: cv2.circle(img_bgr, (x, y), 4, color, -1)
|
|
|
|
| 296 |
cv2.putText(img_bgr, f"{k}:{label}", (x + 6, y - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 1, cv2.LINE_AA)
|
| 297 |
|
| 298 |
def build_T_wc(pos_world, quat_world_xyzw):
|
|
|
|
| 299 |
T = np.eye(4, dtype=np.float64)
|
|
|
|
| 300 |
T[:3, :3] = R.from_quat(np.asarray(quat_world_xyzw, dtype=np.float64)).as_matrix()
|
|
|
|
| 301 |
T[:3, 3] = np.asarray(pos_world, dtype=np.float64)
|
|
|
|
| 302 |
return T
|
| 303 |
|
| 304 |
def compute_velocity(mats, fps=30.0):
|
|
|
|
| 305 |
N = len(mats)
|
|
|
|
| 306 |
if N < 2: return np.zeros((N, 3), dtype=np.float32), np.zeros((N, 3), dtype=np.float32)
|
|
|
|
| 307 |
R_curr = mats[:, :3, :3]
|
|
|
|
| 308 |
R_diff = np.matmul(R_curr[1:], np.transpose(R_curr[:-1], (0, 2, 1)))
|
|
|
|
| 309 |
rv = R.from_matrix(R_diff).as_rotvec()
|
|
|
|
| 310 |
angvel = np.zeros((N, 3), dtype=np.float32)
|
|
|
|
| 311 |
angvel[1:] = rv
|
|
|
|
| 312 |
t_curr = mats[:, :3, 3]
|
|
|
|
| 313 |
tvel = np.zeros((N, 3), dtype=np.float32)
|
|
|
|
| 314 |
tvel[1:] = t_curr[1:] - t_curr[:-1]
|
|
|
|
| 315 |
return angvel.astype(np.float32), tvel.astype(np.float32)
|
| 316 |
|
|
|
|
|
|
|
| 317 |
def _compute_vitpose_selected_indices(num_frames, fps, bucket_seconds, frames_per_bucket, sampling="uniform", seed=123):
|
|
|
|
| 318 |
if num_frames <= 0: return []
|
|
|
|
| 319 |
rng = np.random.default_rng(int(seed))
|
|
|
|
| 320 |
selected = []
|
|
|
|
| 321 |
bucket_len = max(1, int(round(float(bucket_seconds) * float(fps))))
|
|
|
|
| 322 |
b_start = 0
|
|
|
|
| 323 |
while b_start < num_frames:
|
|
|
|
| 324 |
b_end = min(num_frames, b_start + bucket_len)
|
|
|
|
| 325 |
k = min(int(frames_per_bucket), b_end - b_start)
|
|
|
|
| 326 |
if k > 0:
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
|
|
|
|
|
|
| 332 |
if k == 1: idxs = [b_start + (b_end - b_start) // 2]
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
selected.extend(idxs)
|
|
|
|
| 337 |
b_start = b_end
|
|
|
|
| 338 |
return sorted(list(set(selected)))
|
| 339 |
|
| 340 |
-
|
|
|
|
| 341 |
_SMPLX_MODEL = None
|
|
|
|
| 342 |
_SMPLX_DEVICE = None
|
| 343 |
|
| 344 |
def _get_smplx_model(device):
|
|
|
|
| 345 |
global _SMPLX_MODEL, _SMPLX_DEVICE
|
| 346 |
-
|
| 347 |
-
|
|
|
|
| 348 |
from hmr4d.utils.smplx_utils import make_smplx
|
|
|
|
| 349 |
_SMPLX_MODEL = make_smplx("supermotion").to(device).eval()
|
|
|
|
| 350 |
_SMPLX_DEVICE = device
|
|
|
|
| 351 |
return _SMPLX_MODEL
|
| 352 |
|
| 353 |
-
|
|
|
|
| 354 |
class SmplIncamRenderer:
|
|
|
|
| 355 |
def __init__(self, width, height, K4, device="cuda", smplx2smpl_path="hmr4d/utils/body_model/smplx2smpl_sparse.pt"):
|
|
|
|
| 356 |
from hmr4d.utils.smplx_utils import make_smplx
|
|
|
|
| 357 |
from hmr4d.utils.vis.renderer import Renderer
|
|
|
|
| 358 |
self.torch = torch
|
|
|
|
| 359 |
self.device = device
|
|
|
|
| 360 |
self.smplx = make_smplx("supermotion").to(device).eval()
|
| 361 |
-
|
| 362 |
-
self.faces = None
|
|
|
|
| 363 |
try:
|
|
|
|
| 364 |
self.smplx2smpl = torch.load(smplx2smpl_path).to(device)
|
|
|
|
| 365 |
self.faces = make_smplx("smpl").faces
|
|
|
|
| 366 |
except: self.faces = self.smplx.faces
|
|
|
|
| 367 |
self.K_torch = torch.from_numpy(k4_to_K3(K4)).to(device)
|
|
|
|
| 368 |
self.renderer = Renderer(width, height, device=device, faces=self.faces, K=self.K_torch)
|
| 369 |
|
| 370 |
@torch.no_grad()
|
|
|
|
| 371 |
def render(self, img_rgb_uint8, global_orient_aa, body_pose_aa, betas_10, transl_xyz, fl, pp):
|
|
|
|
| 372 |
K3_torch = torch.from_numpy(np.array([[fl[0], 0, pp[0]], [0, fl[1], pp[1]], [0, 0, 1]], dtype=np.float32)).to(self.device)
|
|
|
|
| 373 |
self.renderer.set_intrinsic(K3_torch)
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
}
|
| 380 |
-
out = self.smplx(**params)
|
| 381 |
-
verts = out.vertices[0]
|
| 382 |
if self.smplx2smpl is not None and verts.dim() == 2: verts = torch.matmul(self.smplx2smpl, verts)
|
|
|
|
| 383 |
img_out = self.renderer.render_mesh(verts, img_rgb_uint8, [0.8, 0.8, 0.8])
|
|
|
|
| 384 |
return img_out
|
| 385 |
-
|
| 386 |
@torch.no_grad()
|
|
|
|
| 387 |
def get_verts(self, global_orient_aa, body_pose_aa, betas_10, transl_xyz):
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
verts = out.vertices[0]
|
| 396 |
-
if self.smplx2smpl is not None and verts.dim() == 2:
|
| 397 |
-
verts = torch.matmul(self.smplx2smpl, verts)
|
| 398 |
return verts
|
| 399 |
|
|
|
|
|
|
|
| 400 |
def _as_betas10(betas_any) -> np.ndarray:
|
|
|
|
| 401 |
betas = np.asarray(betas_any, dtype=np.float32).reshape(-1)
|
| 402 |
-
|
| 403 |
-
n = min(10, betas.size)
|
|
|
|
| 404 |
if n > 0: betas10[:n] = betas[:n]
|
|
|
|
| 405 |
return betas10
|
| 406 |
|
| 407 |
def load_betas10_from_npz(npz_path, key="betas", index=None):
|
| 408 |
-
|
| 409 |
-
|
|
|
|
| 410 |
if arr.ndim == 0: arr = np.asarray(arr).reshape(1)
|
|
|
|
| 411 |
if arr.ndim == 1: betas = arr
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
else: raise ValueError(f"Bad betas shape: {arr.shape}")
|
|
|
|
| 416 |
return _as_betas10(betas)
|
| 417 |
|
| 418 |
-
def _default_shape_npz_path() -> str:
|
| 419 |
-
|
|
|
|
| 420 |
|
| 421 |
def parse_smpl_inputs_from_row(row, override_betas10=None, keep_unity_scale=False, transl_source="pelvis", transl_y_offset_m=0.0):
|
|
|
|
| 422 |
C = np.diag([1.0, -1.0, 1.0]).astype(np.float64)
|
|
|
|
| 423 |
cam_rot_w_quat = np.array(row["cam_rot_world"], dtype=np.float64)
|
|
|
|
| 424 |
R_cam_w = R.from_quat(cam_rot_w_quat).as_matrix()
|
|
|
|
| 425 |
pel_rot_w_quat = np.array(row["pelvis_rot_world"], dtype=np.float64)
|
|
|
|
| 426 |
R_pel_w = R.from_quat(pel_rot_w_quat).as_matrix()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
R_rel_unity = R_cam_w.T @ R_pel_w
|
|
|
|
| 428 |
R_cv = C @ R_rel_unity @ C
|
| 429 |
-
|
|
|
|
|
|
|
| 430 |
global_orient_aa = R.from_matrix(R_final).as_rotvec().astype(np.float32)
|
| 431 |
|
|
|
|
|
|
|
| 432 |
smpl_scale = float(row.get("smpl_root_world_scale", 1.0))
|
|
|
|
| 433 |
pelvis_cam_unity = np.asarray(row["smpl_incam_transl"], dtype=np.float64).reshape(3)
|
|
|
|
| 434 |
root_cam_unity = np.asarray(row.get("smpl_root_incam_transl", [0.0, 0.0, 0.0]), dtype=np.float64).reshape(3)
|
|
|
|
| 435 |
pelvis_cam_unity = pelvis_cam_unity + np.array([0.0, float(transl_y_offset_m), 0.0], dtype=np.float64)
|
| 436 |
|
|
|
|
|
|
|
| 437 |
if str(transl_source).strip().lower() == "root": target_cam_unity = root_cam_unity
|
|
|
|
| 438 |
else:
|
|
|
|
| 439 |
if bool(keep_unity_scale): target_cam_unity = pelvis_cam_unity
|
|
|
|
| 440 |
else:
|
|
|
|
| 441 |
if abs(smpl_scale) > 1e-8: target_cam_unity = root_cam_unity + (pelvis_cam_unity - root_cam_unity) / smpl_scale
|
|
|
|
| 442 |
else: target_cam_unity = pelvis_cam_unity
|
|
|
|
| 443 |
target_cam_cv = (C @ target_cam_unity).astype(np.float64)
|
| 444 |
|
|
|
|
|
|
|
| 445 |
pose = np.asarray(row["smplx_pose"], dtype=np.float32)
|
|
|
|
| 446 |
body_pose = pose[3:66].astype(np.float32)
|
|
|
|
| 447 |
betas10 = _as_betas10(override_betas10)
|
| 448 |
-
|
|
|
|
|
|
|
| 449 |
return {
|
|
|
|
| 450 |
"global_orient": global_orient_aa, "body_pose": body_pose, "betas": betas10,
|
|
|
|
| 451 |
"target_cam_cv": target_cam_cv, "cam_rot_w_quat": cam_rot_w_quat,
|
|
|
|
| 452 |
"cam_pos_world": np.asarray(row["cam_pos_world"], dtype=np.float64).reshape(3),
|
|
|
|
| 453 |
"pelvis_pos_world": np.asarray(row["pelvis_pos_world"], dtype=np.float64).reshape(3),
|
|
|
|
| 454 |
"smpl_scale": smpl_scale, "root_cam_unity": root_cam_unity
|
|
|
|
| 455 |
}
|
| 456 |
|
|
|
|
|
|
|
| 457 |
def batch_smpl_forward(betas, global_orient, body_pose, device):
|
|
|
|
| 458 |
model = _get_smplx_model(device)
|
|
|
|
| 459 |
N = len(betas)
|
| 460 |
-
|
| 461 |
-
pelvis_list = []
|
|
|
|
| 462 |
with torch.no_grad():
|
|
|
|
| 463 |
for i in range(0, N, chunk_size):
|
|
|
|
| 464 |
b_betas = torch.from_numpy(betas[i:i+chunk_size]).float().to(device)
|
|
|
|
| 465 |
b_go = torch.from_numpy(global_orient[i:i+chunk_size]).float().to(device)
|
|
|
|
| 466 |
b_bp = torch.from_numpy(body_pose[i:i+chunk_size]).float().to(device)
|
|
|
|
| 467 |
b_tr = torch.zeros((len(b_betas), 3), dtype=torch.float32, device=device)
|
|
|
|
| 468 |
out = model(betas=b_betas, global_orient=b_go, body_pose=b_bp, transl=b_tr)
|
|
|
|
| 469 |
pelvis_list.append(out.joints[:, 0, :].detach().cpu().numpy())
|
|
|
|
| 470 |
return np.concatenate(pelvis_list, axis=0)
|
| 471 |
|
|
|
|
|
|
|
| 472 |
def main():
|
|
|
|
| 473 |
parser = argparse.ArgumentParser()
|
|
|
|
| 474 |
parser.add_argument("--input", required=True)
|
|
|
|
| 475 |
parser.add_argument("--output", required=True)
|
|
|
|
| 476 |
parser.add_argument("--debug", action="store_true")
|
|
|
|
| 477 |
parser.add_argument("--vitpose", action="store_true")
|
| 478 |
-
|
|
|
|
|
|
|
| 479 |
parser.add_argument("--dpvo", action="store_true")
|
|
|
|
| 480 |
parser.add_argument("--smplx", action="store_true")
|
|
|
|
| 481 |
parser.add_argument("--debug_no_coco", action="store_true")
|
|
|
|
| 482 |
parser.add_argument("--shape_npz", default=_default_shape_npz_path())
|
|
|
|
| 483 |
parser.add_argument("--vitpose_use_all_frames", action="store_true")
|
|
|
|
| 484 |
parser.add_argument("--vitpose_bucket_seconds", type=float, default=12.0)
|
|
|
|
| 485 |
parser.add_argument("--vitpose_frames_per_bucket", type=int, default=36)
|
|
|
|
| 486 |
parser.add_argument("--vitpose_sampling", type=str, default="random")
|
|
|
|
| 487 |
parser.add_argument("--vitpose_seed", type=int, default=123)
|
|
|
|
| 488 |
parser.add_argument("--ui_dir", type=str, default=None)
|
|
|
|
| 489 |
parser.add_argument("--ui_show_prob", type=float, default=0.25)
|
|
|
|
| 490 |
parser.add_argument("--ui_max_images", type=int, default=3)
|
|
|
|
| 491 |
parser.add_argument("--ui_hold_min_s", type=float, default=0.7)
|
|
|
|
| 492 |
parser.add_argument("--ui_hold_max_s", type=float, default=5.0)
|
|
|
|
| 493 |
parser.add_argument("--ui_seed", type=int, default=None)
|
|
|
|
| 494 |
parser.add_argument("--keep_unity_scale", action="store_true")
|
|
|
|
| 495 |
parser.add_argument("--transl_source", type=str, default="pelvis")
|
|
|
|
| 496 |
parser.add_argument("--transl_y_offset_m", type=float, default=-0.020)
|
|
|
|
| 497 |
parser.add_argument("--world_y_offset_m", type=float, default=1.3415)
|
|
|
|
| 498 |
parser.add_argument("--vit_batch_size", type=int, default=512, help="Batch size for in-memory ViT extraction")
|
|
|
|
| 499 |
args = parser.parse_args()
|
| 500 |
|
|
|
|
|
|
|
| 501 |
if not (args.vitpose or args.genmo or args.dpvo or args.smplx):
|
|
|
|
| 502 |
args.vitpose = args.genmo = args.dpvo = args.smplx = True
|
| 503 |
|
|
|
|
|
|
|
| 504 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 505 |
print(f"Running STREAMING processing on {device.upper()}...")
|
| 506 |
-
|
| 507 |
-
|
|
|
|
| 508 |
vit_model = None
|
|
|
|
| 509 |
if args.genmo and Extractor is not None:
|
|
|
|
| 510 |
print("Initializing ViT Extractor (HMR2)...")
|
|
|
|
| 511 |
extractor_wrapper = Extractor(tqdm_leave=False)
|
|
|
|
| 512 |
vit_model = extractor_wrapper.extractor
|
|
|
|
| 513 |
vit_model.eval()
|
|
|
|
| 514 |
vit_model.to(device)
|
| 515 |
|
|
|
|
|
|
|
| 516 |
override_betas10 = load_betas10_from_npz(args.shape_npz, key="betas")
|
|
|
|
| 517 |
temp_ann_dir = os.path.join(args.output, "vitpose", "temp_annotations")
|
|
|
|
| 518 |
os.makedirs(temp_ann_dir, exist_ok=True)
|
|
|
|
| 519 |
jsonl_files = sorted(glob(os.path.join(args.input, "sequence_*.jsonl")))
|
| 520 |
-
|
|
|
|
|
|
|
| 521 |
global_J_reg = None
|
|
|
|
| 522 |
j_reg_path = "third_party/GVHMR/inputs/checkpoints/body_models/smpl_neutral_J_regressor.pt"
|
|
|
|
| 523 |
if os.path.exists(j_reg_path) and device == "cuda":
|
|
|
|
| 524 |
global_J_reg = torch.load(j_reg_path, map_location=device)
|
| 525 |
|
|
|
|
|
|
|
| 526 |
for jsonl_idx, jsonl_path in enumerate(jsonl_files):
|
|
|
|
| 527 |
seq_name = os.path.splitext(os.path.basename(jsonl_path))[0].replace("sequence_", "")
|
|
|
|
| 528 |
print(f"[{jsonl_idx+1}/{len(jsonl_files)}] Processing {seq_name}...")
|
| 529 |
-
|
|
|
|
|
|
|
| 530 |
prof = {"smpl_batch": 0.0, "video_read": 0.0, "overlay": 0.0, "vit_process": 0.0,
|
|
|
|
| 531 |
"sparse_write": 0.0, "loop_total": 0.0, "save_files": 0.0, "debug_rend": 0.0, "prep": 0.0}
|
| 532 |
|
|
|
|
|
|
|
| 533 |
t_start_seq = time.perf_counter()
|
| 534 |
-
|
| 535 |
jsonl_dir = os.path.dirname(os.path.abspath(jsonl_path))
|
|
|
|
| 536 |
video_path = os.path.join(jsonl_dir, f"video_{seq_name}.mp4")
|
|
|
|
| 537 |
if not os.path.exists(video_path): video_path = os.path.join(jsonl_dir, "video.mp4")
|
| 538 |
|
| 539 |
-
|
|
|
|
| 540 |
out_img_folder = os.path.join(args.output, "images", seq_name)
|
|
|
|
| 541 |
os.makedirs(out_img_folder, exist_ok=True)
|
| 542 |
-
|
| 543 |
-
|
| 544 |
|
| 545 |
with open(jsonl_path, "r") as f: lines = f.readlines()
|
|
|
|
| 546 |
lines = lines[1:] if len(lines) > 0 else []
|
|
|
|
| 547 |
num_frames = len(lines)
|
|
|
|
| 548 |
if num_frames <= 0: continue
|
| 549 |
|
|
|
|
|
|
|
| 550 |
genmo_out = os.path.join(args.output, "genmo_features", f"{seq_name}.pt")
|
|
|
|
| 551 |
smplx_out = os.path.join(args.output, "smplx_incam", f"{seq_name}_smplx.npz")
|
|
|
|
| 552 |
smplx_global_out = os.path.join(args.output, "smplx_global", f"{seq_name}_global.npz")
|
|
|
|
| 553 |
dpvo_dir = os.path.join(args.output, "dpvo", seq_name)
|
|
|
|
| 554 |
for p in [genmo_out, smplx_out, smplx_global_out, dpvo_dir]:
|
|
|
|
| 555 |
if p: os.makedirs(os.path.dirname(p), exist_ok=True)
|
| 556 |
|
|
|
|
|
|
|
| 557 |
selected_set = set()
|
|
|
|
| 558 |
if args.vitpose:
|
|
|
|
| 559 |
if args.vitpose_use_all_frames: selected_indices = list(range(num_frames))
|
|
|
|
| 560 |
else:
|
|
|
|
| 561 |
selected_indices = _compute_vitpose_selected_indices(
|
|
|
|
| 562 |
num_frames, FPS, args.vitpose_bucket_seconds,
|
|
|
|
| 563 |
args.vitpose_frames_per_bucket, args.vitpose_sampling, args.vitpose_seed
|
|
|
|
| 564 |
)
|
|
|
|
| 565 |
selected_set = set(selected_indices)
|
| 566 |
|
|
|
|
|
|
|
| 567 |
cap = cv2.VideoCapture(video_path)
|
|
|
|
| 568 |
if not cap.isOpened(): continue
|
|
|
|
| 569 |
W = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
|
|
| 570 |
H = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 571 |
-
|
|
|
|
|
|
|
| 572 |
resolved_ui_dir = args.ui_dir if args.ui_dir else _find_ui_dir()
|
|
|
|
| 573 |
chat_font_path = _find_font_path(resolved_ui_dir)
|
|
|
|
| 574 |
seq_seed = int(zlib.crc32(seq_name.encode("utf-8")) & 0xFFFFFFFF)
|
|
|
|
| 575 |
chat_aug = SimpleChatOverlay(W, H, seed=seq_seed, num_lines=7, font_path=chat_font_path)
|
| 576 |
-
|
|
|
|
|
|
|
| 577 |
max_images=args.ui_max_images, show_prob=args.ui_show_prob)
|
| 578 |
|
|
|
|
|
|
|
| 579 |
# --- BATCH SMPL (GPU) ---
|
|
|
|
| 580 |
t0_smpl = time.perf_counter()
|
| 581 |
-
|
|
|
|
|
|
|
| 582 |
smpl_precalc_data = []
|
|
|
|
| 583 |
debug_global_verts_cpu = []
|
|
|
|
| 584 |
parsed_rows = []
|
| 585 |
-
|
|
|
|
|
|
|
| 586 |
for line in lines:
|
|
|
|
| 587 |
row = json.loads(line)
|
|
|
|
| 588 |
parsed_rows.append(parse_smpl_inputs_from_row(row, override_betas10, args.keep_unity_scale, args.transl_source, args.transl_y_offset_m))
|
| 589 |
-
|
|
|
|
|
|
|
| 590 |
all_betas = np.stack([d['betas'] for d in parsed_rows])
|
|
|
|
| 591 |
all_go = np.stack([d['global_orient'] for d in parsed_rows])
|
|
|
|
| 592 |
all_bp = np.stack([d['body_pose'] for d in parsed_rows])
|
| 593 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 594 |
all_pelvis0 = batch_smpl_forward(all_betas, all_go, all_bp, device=device)
|
| 595 |
-
|
|
|
|
|
|
|
| 596 |
C = np.diag([1.0, -1.0, 1.0]).astype(np.float64)
|
| 597 |
-
|
|
|
|
|
|
|
| 598 |
all_go_w, all_pelvis_pos_w_cv = [], []
|
| 599 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 600 |
for i, d in enumerate(parsed_rows):
|
|
|
|
|
|
|
|
|
|
| 601 |
R_cam_w_unity = R.from_quat(d['cam_rot_w_quat']).as_matrix()
|
| 602 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 603 |
R_pelvis_c_cv = R.from_rotvec(d['global_orient'].astype(np.float64)).as_matrix()
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
model_gpu = _get_smplx_model(device)
|
| 608 |
-
with torch.no_grad():
|
| 609 |
-
out_fix = model_gpu(betas=torch.from_numpy(d['betas'][None]).float().to(device),
|
| 610 |
-
global_orient=torch.from_numpy(go_w0[None]).float().to(device),
|
| 611 |
-
body_pose=torch.from_numpy(d['body_pose'][None]).float().to(device))
|
| 612 |
-
joints = out_fix.joints[0].detach().cpu().numpy().astype(np.float64)
|
| 613 |
-
pelvis_y = float(joints[0, 1])
|
| 614 |
-
head_y = float(joints[15, 1]) if joints.shape[0] > 15 else pelvis_y
|
| 615 |
-
if head_y < pelvis_y: world_fix_R = R.from_euler("x", 180, degrees=True).as_matrix().astype(np.float64)
|
| 616 |
-
else: world_fix_R = np.eye(3, dtype=np.float64)
|
| 617 |
-
world_fix_R4 = np.eye(4, dtype=np.float64)
|
| 618 |
-
world_fix_R4[:3, :3] = world_fix_R
|
| 619 |
-
world_fix_R4_inv = np.eye(4, dtype=np.float64); world_fix_R4_inv[:3, :3] = world_fix_R.T
|
| 620 |
-
|
| 621 |
-
R_pelvis_w_cv = world_fix_R @ R_pelvis_w_cv_raw
|
| 622 |
all_go_w.append(R.from_matrix(R_pelvis_w_cv).as_rotvec().astype(np.float32))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 623 |
|
| 624 |
pelvis_pos_w_unity = d['pelvis_pos_world']
|
|
|
|
| 625 |
root_pos_w_unity = (R_cam_w_unity @ d['root_cam_unity'] + d['cam_pos_world']).reshape(3)
|
|
|
|
| 626 |
smpl_scale = d['smpl_scale']
|
|
|
|
| 627 |
transl_source_local = str(args.transl_source).strip().lower()
|
|
|
|
| 628 |
if transl_source_local == "root": target_pos_w_unity = root_pos_w_unity
|
|
|
|
| 629 |
else:
|
|
|
|
| 630 |
if bool(args.keep_unity_scale): target_pos_w_unity = pelvis_pos_w_unity
|
|
|
|
| 631 |
else:
|
|
|
|
| 632 |
if abs(smpl_scale) > 1e-8: target_pos_w_unity = root_pos_w_unity + (pelvis_pos_w_unity - root_pos_w_unity) / smpl_scale
|
|
|
|
| 633 |
else: target_pos_w_unity = pelvis_pos_w_unity
|
| 634 |
-
pelvis_pos_w_cv = (C @ target_pos_w_unity).astype(np.float64)
|
| 635 |
-
pelvis_pos_w_cv = (world_fix_R @ pelvis_pos_w_cv.reshape(3, 1)).reshape(3)
|
| 636 |
-
all_pelvis_pos_w_cv.append(pelvis_pos_w_cv)
|
| 637 |
|
| 638 |
-
|
| 639 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 640 |
|
| 641 |
-
for i in range(num_frames):
|
| 642 |
-
d = parsed_rows[i]
|
| 643 |
-
transl_c = (d['target_cam_cv'] - all_pelvis0[i]).astype(np.float32)
|
| 644 |
-
if str(args.transl_source) == "root": transl_w = all_pelvis_pos_w_cv[i].astype(np.float32)
|
| 645 |
else: transl_w = (all_pelvis_pos_w_cv[i] - all_pelvis0_w[i]).astype(np.float32)
|
|
|
|
|
|
|
|
|
|
| 646 |
smpl_precalc_data.append({
|
|
|
|
| 647 |
"go_c": d['global_orient'], "bp": d['body_pose'], "beta": d['betas'], "tr_c": transl_c,
|
| 648 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 649 |
})
|
| 650 |
-
|
|
|
|
|
|
|
| 651 |
prof["smpl_batch"] = time.perf_counter() - t0_smpl
|
| 652 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 653 |
t0_gap = time.perf_counter()
|
|
|
|
| 654 |
smpl_renderer = None
|
|
|
|
| 655 |
vid_incam, vid_global = None, None
|
|
|
|
| 656 |
debug_end_frame = min(num_frames, DEBUG_NUM_FRAMES)
|
|
|
|
| 657 |
if args.debug:
|
|
|
|
| 658 |
os.makedirs(os.path.join(args.output, "debug_renders"), exist_ok=True)
|
|
|
|
| 659 |
if debug_end_frame > 0:
|
|
|
|
| 660 |
try:
|
| 661 |
-
|
| 662 |
K4_init = np.asarray(json.loads(lines[0])["cam_intrinsics"], dtype=np.float32)
|
|
|
|
| 663 |
smpl_renderer = SmplIncamRenderer(W, H, K4_init, device=device)
|
|
|
|
| 664 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
|
|
|
| 665 |
vid_incam = cv2.VideoWriter(os.path.join(args.output, "debug_renders", f"{seq_name}_incam.mp4"), fourcc, FPS, (W, H))
|
| 666 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
except: pass
|
| 668 |
|
|
|
|
|
|
|
| 669 |
# --- MAIN LOOP ---
|
| 670 |
-
|
| 671 |
-
img_paths = []
|
| 672 |
-
|
| 673 |
cam_T_wc_cv_all, cam_T_w2c_cv_all = [], []
|
|
|
|
| 674 |
dpvo_poses, dpvo_intrinsics = [], []
|
| 675 |
-
|
|
|
|
|
|
|
| 676 |
global_orient_c_all, transl_c_all, body_pose_all, betas_all = [], [], [], []
|
|
|
|
| 677 |
global_orient_w_all, transl_w_all = [], []
|
| 678 |
-
C4 = np.diag([1.0, -1.0, 1.0, 1.0]).astype(np.float64)
|
| 679 |
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
|
| 684 |
ret, _ = cap.read() # skip 0
|
|
|
|
| 685 |
prof["prep"] = time.perf_counter() - t0_gap
|
| 686 |
|
|
|
|
|
|
|
| 687 |
t_start_loop = time.perf_counter()
|
|
|
|
| 688 |
for idx in tqdm(range(num_frames), desc="Frames", leave=False):
|
|
|
|
| 689 |
t0_read = time.perf_counter()
|
|
|
|
| 690 |
ret, img_bgr = cap.read()
|
|
|
|
| 691 |
prof["video_read"] += (time.perf_counter() - t0_read)
|
|
|
|
| 692 |
if not ret: break
|
| 693 |
-
|
|
|
|
|
|
|
| 694 |
img_filename = f"img_{idx:05d}.jpg"
|
|
|
|
| 695 |
img_abs_path = os.path.join(out_img_folder, img_filename)
|
| 696 |
-
|
| 697 |
-
|
|
|
|
| 698 |
t0_ov = time.perf_counter()
|
|
|
|
| 699 |
chat_aug.maybe_append(idx)
|
|
|
|
| 700 |
chat_aug.draw(img_bgr)
|
|
|
|
| 701 |
ui_aug.draw(img_bgr)
|
|
|
|
| 702 |
prof["overlay"] += (time.perf_counter() - t0_ov)
|
| 703 |
|
| 704 |
-
|
| 705 |
-
|
|
|
|
|
|
|
| 706 |
K4 = np.asarray(row["cam_intrinsics"], dtype=np.float32)
|
|
|
|
| 707 |
kpts_raw = np.asarray(row["kpts_2d"], dtype=np.float32).reshape(-1, 2)[:17]
|
|
|
|
| 708 |
vis_raw = np.asarray(row["kpts_vis"], dtype=np.int32)[:17]
|
| 709 |
-
|
|
|
|
|
|
|
| 710 |
bbox = clamp_bbox_xywh_to_image(row["bbox"], W, H)
|
| 711 |
-
|
|
|
|
|
|
|
| 712 |
sd = smpl_precalc_data[idx]
|
|
|
|
| 713 |
global_orient_c_all.append(sd['go_c'])
|
|
|
|
| 714 |
transl_c_all.append(sd['tr_c'])
|
|
|
|
| 715 |
global_orient_w_all.append(sd['go_w'])
|
|
|
|
| 716 |
transl_w_all.append(sd['tr_w'])
|
|
|
|
| 717 |
body_pose_all.append(sd['bp'])
|
|
|
|
| 718 |
betas_all.append(sd['beta'])
|
| 719 |
-
|
|
|
|
|
|
|
| 720 |
bboxes.append(np.asarray(bbox, dtype=np.float32))
|
|
|
|
| 721 |
bbx_xys_all.append(bbox_xywh_to_bbx_xys(bbox))
|
|
|
|
| 722 |
kp2d_all.append(np.concatenate([kpts_raw, (vis_raw > 0).astype(np.float32)[:, None]], axis=1))
|
|
|
|
| 723 |
K_fullimg_all.append(k4_to_K3(K4))
|
| 724 |
|
|
|
|
|
|
|
| 725 |
img_rel = os.path.join("images", seq_name, img_filename).replace("\\", "/")
|
|
|
|
| 726 |
img_paths.append(img_rel)
|
| 727 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 728 |
cam_T_wc = build_T_wc(p_w, q_w)
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 736 |
cam_T_wc_cv_all.append(cam_T_wc_cv)
|
|
|
|
| 737 |
cam_T_w2c_cv_all.append(cam_T_w2c_cv)
|
|
|
|
| 738 |
dpvo_poses.append(f"{p_w[0]} {p_w[1]} {p_w[2]} {q_w[0]} {q_w[1]} {q_w[2]} {q_w[3]}")
|
|
|
|
| 739 |
dpvo_intrinsics.append(K4.astype(np.float32))
|
| 740 |
|
| 741 |
-
|
|
|
|
| 742 |
if args.genmo and vit_model is not None:
|
|
|
|
| 743 |
t0_vit = time.perf_counter()
|
| 744 |
-
|
| 745 |
-
img_tensor = _process_image_memory(img_bgr, bbox, img_size=256)
|
|
|
|
| 746 |
vit_img_batch.append(img_tensor)
|
| 747 |
|
| 748 |
if len(vit_img_batch) >= args.vit_batch_size:
|
|
|
|
| 749 |
batch_np = np.stack(vit_img_batch)
|
|
|
|
| 750 |
batch_t = torch.from_numpy(batch_np).to(device, non_blocking=True)
|
|
|
|
| 751 |
with torch.inference_mode():
|
|
|
|
| 752 |
with torch.amp.autocast("cuda"):
|
|
|
|
| 753 |
feats = vit_model({"img": batch_t})
|
|
|
|
| 754 |
all_vit_features.append(feats.detach().cpu())
|
|
|
|
| 755 |
vit_img_batch = []
|
|
|
|
| 756 |
prof["vit_process"] += (time.perf_counter() - t0_vit)
|
| 757 |
|
| 758 |
-
|
|
|
|
| 759 |
if args.vitpose and (idx in selected_set):
|
|
|
|
| 760 |
t0_wr = time.perf_counter()
|
| 761 |
-
|
| 762 |
cv2.imwrite(img_abs_path, img_bgr, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
|
|
|
|
| 763 |
kpts_coco = []
|
|
|
|
| 764 |
for k in range(17): kpts_coco.extend([float(kpts_raw[k, 0]), float(kpts_raw[k, 1]), int(vis_raw[k])])
|
|
|
|
| 765 |
coco_subset.append(({"file_name": img_rel, "width": W, "height": H},
|
|
|
|
| 766 |
{"category_id": 1, "bbox": bbox, "area": float(bbox[2]*bbox[3]), "iscrowd": 0, "keypoints": kpts_coco, "num_keypoints": int(np.sum(vis_raw > 0))}))
|
|
|
|
| 767 |
prof["sparse_write"] += (time.perf_counter() - t0_wr)
|
| 768 |
|
| 769 |
-
|
|
|
|
| 770 |
if args.debug and idx < debug_end_frame and smpl_renderer:
|
|
|
|
| 771 |
t0_dbg = time.perf_counter()
|
|
|
|
| 772 |
dbg = img_bgr.copy()
|
|
|
|
| 773 |
try: draw_bbox_xywh_and_center(dbg, bbox)
|
|
|
|
| 774 |
except: pass
|
|
|
|
| 775 |
try:
|
|
|
|
| 776 |
rgb = smpl_renderer.render(dbg[:, :, ::-1].copy(), sd['go_c'], sd['bp'], sd['beta'], sd['tr_c'], K4[:2], K4[2:])
|
|
|
|
| 777 |
dbg = rgb[:, :, ::-1].copy()
|
|
|
|
| 778 |
except: pass
|
|
|
|
| 779 |
if not args.debug_no_coco:
|
|
|
|
| 780 |
draw_vis_text_and_points(dbg, kpts_raw, vis_raw)
|
|
|
|
| 781 |
if vid_incam: vid_incam.write(dbg)
|
| 782 |
-
|
|
|
|
|
|
|
| 783 |
if vid_global:
|
|
|
|
| 784 |
verts_w = smpl_renderer.get_verts(sd['go_w'], sd['bp'], sd['beta'], sd['tr_w']).float()
|
|
|
|
| 785 |
debug_global_verts_cpu.append(verts_w.detach().cpu())
|
|
|
|
| 786 |
prof["debug_rend"] += (time.perf_counter() - t0_dbg)
|
| 787 |
|
| 788 |
-
|
|
|
|
| 789 |
if args.genmo and len(vit_img_batch) > 0 and vit_model is not None:
|
|
|
|
| 790 |
t0_vit = time.perf_counter()
|
|
|
|
| 791 |
batch_np = np.stack(vit_img_batch)
|
|
|
|
| 792 |
batch_t = torch.from_numpy(batch_np).to(device, non_blocking=True)
|
|
|
|
| 793 |
with torch.inference_mode():
|
|
|
|
| 794 |
with torch.amp.autocast("cuda"):
|
|
|
|
| 795 |
feats = vit_model({"img": batch_t})
|
|
|
|
| 796 |
all_vit_features.append(feats.detach().cpu())
|
|
|
|
| 797 |
prof["vit_process"] += (time.perf_counter() - t0_vit)
|
| 798 |
|
|
|
|
|
|
|
| 799 |
prof["loop_total"] = time.perf_counter() - t_start_loop
|
|
|
|
| 800 |
cap.release()
|
|
|
|
| 801 |
if vid_incam: vid_incam.release()
|
| 802 |
-
|
| 803 |
-
|
|
|
|
| 804 |
t0_dbg = time.perf_counter()
|
|
|
|
| 805 |
if vid_global and len(debug_global_verts_cpu) > 0:
|
|
|
|
| 806 |
try:
|
| 807 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 808 |
from hmr4d.utils.geo.hmr_cam import create_camera_sensor
|
| 809 |
-
|
| 810 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 811 |
verts_seq = torch.stack(debug_global_verts_cpu, dim=0)
|
|
|
|
| 812 |
off = verts_seq[0].mean(0); off[1] = verts_seq[0, :, 1].min()
|
|
|
|
| 813 |
verts_seq = verts_seq - off
|
| 814 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 815 |
if global_J_reg is not None and verts_seq.shape[1] == global_J_reg.shape[-1]:
|
| 816 |
-
|
| 817 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 818 |
sc, cx, cz = get_ground_params_from_points(roots, verts_seq)
|
|
|
|
| 819 |
global_renderer.set_ground(sc * 1.5, cx, cz)
|
|
|
|
| 820 |
col = torch.tensor([[0.0, 1.0, 0.0]], device=device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 821 |
for i in range(len(verts_seq)):
|
|
|
|
| 822 |
cam = global_renderer.create_camera(g_R[i], g_T[i])
|
|
|
|
| 823 |
img = global_renderer.render_with_ground(verts_seq[i].to(device)[None], col, cam, g_L)
|
| 824 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 825 |
except: pass
|
|
|
|
| 826 |
vid_global.release()
|
|
|
|
| 827 |
prof["debug_rend"] += (time.perf_counter() - t0_dbg)
|
| 828 |
-
|
|
|
|
|
|
|
| 829 |
t0_save = time.perf_counter()
|
|
|
|
| 830 |
if args.genmo:
|
|
|
|
| 831 |
trans_w = np.stack(transl_w_all).astype(np.float32)
|
|
|
|
| 832 |
world_off = trans_w[0].copy(); world_off[1] -= float(args.world_y_offset_m)
|
|
|
|
| 833 |
trans_w_centered = trans_w - world_off[None]
|
|
|
|
| 834 |
mats_w2c = np.stack(cam_T_w2c_cv_all).astype(np.float32)
|
|
|
|
| 835 |
mats_wc = np.stack(cam_T_wc_cv_all).astype(np.float32)
|
|
|
|
| 836 |
T_wp_w = np.eye(4, dtype=np.float32); T_wp_w[:3, 3] = world_off
|
|
|
|
| 837 |
T_w_wp = np.eye(4, dtype=np.float32); T_w_wp[:3, 3] = -world_off
|
|
|
|
| 838 |
mats_w2c_c = np.matmul(mats_w2c, T_wp_w[None])
|
|
|
|
| 839 |
mats_wc_c = np.matmul(T_w_wp[None], mats_wc)
|
|
|
|
| 840 |
cam_av, cam_tv = compute_velocity(mats_wc_c, fps=FPS)
|
| 841 |
-
|
| 842 |
-
|
|
|
|
| 843 |
f_imgseq = torch.cat(all_vit_features, dim=0).float() if all_vit_features else torch.empty(0)
|
| 844 |
|
|
|
|
|
|
|
| 845 |
g_dict = {
|
|
|
|
| 846 |
"smpl_params_c": {"global_orient": torch.from_numpy(np.stack(global_orient_c_all)), "body_pose": torch.from_numpy(np.stack(body_pose_all)), "transl": torch.from_numpy(np.stack(transl_c_all)), "betas": torch.from_numpy(np.stack(betas_all))},
|
|
|
|
| 847 |
"smpl_params_w": {"global_orient": torch.from_numpy(np.stack(global_orient_w_all)), "body_pose": torch.from_numpy(np.stack(body_pose_all)), "transl": torch.from_numpy(trans_w_centered), "betas": torch.from_numpy(np.stack(betas_all))},
|
|
|
|
| 848 |
"T_w2c": torch.from_numpy(mats_w2c_c), "K_fullimg": torch.from_numpy(np.stack(K_fullimg_all)),
|
|
|
|
| 849 |
"kp2d": torch.from_numpy(np.stack(kp2d_all)), "bbx_xys": torch.from_numpy(np.stack(bbx_xys_all)),
|
|
|
|
| 850 |
"cam_angvel": torch.from_numpy(cam_av), "cam_tvel": torch.from_numpy(cam_tv),
|
|
|
|
| 851 |
"imgname": img_paths, "valid_mask": torch.ones(len(img_paths), dtype=torch.float32),
|
|
|
|
| 852 |
"world_offset": torch.from_numpy(world_off.astype(np.float32)),
|
| 853 |
-
|
|
|
|
|
|
|
| 854 |
}
|
|
|
|
| 855 |
torch.save(g_dict, genmo_out)
|
| 856 |
|
|
|
|
|
|
|
| 857 |
if args.smplx:
|
|
|
|
| 858 |
poses66 = np.concatenate([np.stack(global_orient_w_all), np.stack(body_pose_all)], axis=1)
|
|
|
|
| 859 |
poses165 = np.pad(poses66, ((0,0),(0,99)), mode="constant").astype(np.float32)
|
|
|
|
| 860 |
trans_w = np.stack(transl_w_all).astype(np.float32)
|
|
|
|
| 861 |
world_off = trans_w[0].copy(); world_off[1] -= float(args.world_y_offset_m)
|
|
|
|
| 862 |
trans_w = trans_w - world_off[None]
|
|
|
|
| 863 |
np.savez(smplx_global_out, mocap_framerate=int(FPS), gender="neutral", betas=betas_all[0], trans=trans_w, poses=poses165, world_offset=world_off)
|
| 864 |
|
|
|
|
|
|
|
| 865 |
if args.vitpose and coco_subset:
|
|
|
|
| 866 |
with open(os.path.join(temp_ann_dir, f"{seq_name}.json"), "w") as f: json.dump(coco_subset, f)
|
| 867 |
-
|
|
|
|
|
|
|
| 868 |
prof["save_files"] = time.perf_counter() - t0_save
|
|
|
|
| 869 |
total_t = time.perf_counter() - t_start_seq
|
| 870 |
-
|
|
|
|
|
|
|
| 871 |
print(f" > Done in {total_t:.2f}s | FPS: {num_frames/total_t:.1f}")
|
|
|
|
| 872 |
print(f" [Breakdown] BatchPrep: {prof['smpl_batch']:.2f}s | Init/Gap: {prof['prep']:.2f}s | Read: {prof['video_read']:.2f}s")
|
|
|
|
| 873 |
print(f" Overlay: {prof['overlay']:.2f}s | SparseWrite: {prof['sparse_write']:.2f}s | ViT: {prof['vit_process']:.2f}s")
|
|
|
|
| 874 |
print(f" DbgRend: {prof['debug_rend']:.2f}s | SaveFiles: {prof['save_files']:.2f}s")
|
| 875 |
|
|
|
|
|
|
|
| 876 |
print("All sequences processed.")
|
| 877 |
|
|
|
|
|
|
|
| 878 |
if __name__ == "__main__":
|
| 879 |
-
|
|
|
|
|
|
| 1 |
import sys
|
| 2 |
+
|
| 3 |
import os
|
| 4 |
+
|
| 5 |
import json
|
| 6 |
+
|
| 7 |
import argparse
|
| 8 |
+
|
| 9 |
import numpy as np
|
| 10 |
+
|
| 11 |
import zlib
|
| 12 |
+
|
| 13 |
from glob import glob
|
| 14 |
+
|
| 15 |
from tqdm import tqdm
|
| 16 |
+
|
| 17 |
import cv2
|
| 18 |
+
|
| 19 |
import torch
|
| 20 |
+
|
| 21 |
from scipy.spatial.transform import Rotation as R
|
| 22 |
+
|
| 23 |
import time
|
| 24 |
+
|
| 25 |
import shutil
|
| 26 |
+
|
| 27 |
from pathlib import Path
|
| 28 |
|
| 29 |
+
|
| 30 |
+
|
| 31 |
# --- SETUP PATHS FOR IMPORTS ---
|
| 32 |
+
|
| 33 |
+
REPO_ROOT = Path(__file__).resolve().parents[2]
|
| 34 |
+
|
| 35 |
if str(REPO_ROOT) not in sys.path:
|
| 36 |
+
|
| 37 |
sys.path.insert(0, str(REPO_ROOT))
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
from hmr4d.utils.preproc.vitfeat_extractor import Extractor
|
| 42 |
+
|
| 43 |
+
from hmr4d.utils.pylogger import Log
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# Force single thread
|
| 48 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
os.environ["OMP_NUM_THREADS"] = "1"
|
| 50 |
+
|
| 51 |
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
|
| 52 |
+
|
| 53 |
cv2.setNumThreads(0)
|
| 54 |
+
|
| 55 |
torch.set_num_threads(1)
|
| 56 |
|
| 57 |
+
|
| 58 |
+
|
| 59 |
FPS = 30.0
|
| 60 |
+
|
| 61 |
+
DEBUG_NUM_FRAMES = 5
|
| 62 |
+
|
| 63 |
IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
| 64 |
+
|
| 65 |
IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
| 66 |
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
# --- HELPER FUNCTIONS (No Changes) ---
|
| 70 |
|
| 71 |
def _process_image_memory(img_bgr, bbox_xywh, img_size=256):
|
| 72 |
+
|
| 73 |
+
if img_bgr is None: return np.zeros((3, img_size, img_size), dtype=np.float32)
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
x, y, w, h = bbox_xywh
|
| 76 |
+
|
| 77 |
cx, cy = x + w/2, y + h/2
|
| 78 |
+
|
| 79 |
+
scale = max(w, h) * 1.2
|
| 80 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
H, W = img_bgr.shape[:2]
|
| 82 |
+
|
| 83 |
max_side = float(max(H, W, 1))
|
| 84 |
|
| 85 |
+
if scale <= 1.0 or scale > max_side * 20.0: scale = max_side * 0.5
|
| 86 |
+
|
|
|
|
|
|
|
| 87 |
half = scale / 2.0
|
| 88 |
+
|
| 89 |
x0, y0 = int(cx - half), int(cy - half)
|
| 90 |
+
|
| 91 |
x1, y1 = int(cx + half), int(cy + half)
|
| 92 |
|
| 93 |
pad_l, pad_t = max(0, -x0), max(0, -y0)
|
| 94 |
+
|
| 95 |
pad_r, pad_b = max(0, x1 - W), max(0, y1 - H)
|
| 96 |
|
| 97 |
+
if max(pad_l, pad_t, pad_r, pad_b) > int(max_side * 4.0): return np.zeros((3, img_size, img_size), dtype=np.float32)
|
|
|
|
|
|
|
| 98 |
|
| 99 |
if pad_l or pad_t or pad_r or pad_b:
|
| 100 |
+
|
| 101 |
img_bgr = cv2.copyMakeBorder(img_bgr, pad_t, pad_b, pad_l, pad_r, cv2.BORDER_CONSTANT, value=(0,0,0))
|
| 102 |
+
|
| 103 |
x0 += pad_l; y0 += pad_t; x1 += pad_l; y1 += pad_t
|
| 104 |
|
| 105 |
crop = img_bgr[y0:y1, x0:x1]
|
| 106 |
+
|
| 107 |
+
if crop.size == 0: return np.zeros((3, img_size, img_size), dtype=np.float32)
|
| 108 |
+
|
|
|
|
| 109 |
if crop.shape[0] != img_size or crop.shape[1] != img_size:
|
| 110 |
+
|
| 111 |
crop = cv2.resize(crop, (img_size, img_size), interpolation=cv2.INTER_LINEAR)
|
| 112 |
|
| 113 |
+
crop = crop[:, :, ::-1].astype(np.float32) / 255.0
|
| 114 |
+
|
| 115 |
crop = (crop - IMAGENET_MEAN) / IMAGENET_STD
|
| 116 |
+
|
| 117 |
+
return crop.transpose(2, 0, 1)
|
| 118 |
+
|
| 119 |
+
|
| 120 |
|
| 121 |
def _alpha_blend_bgra_onto_bgr(dst_bgr, src_bgra, x, y):
|
| 122 |
+
|
| 123 |
if dst_bgr is None or src_bgra is None: return dst_bgr
|
| 124 |
+
|
| 125 |
H, W = dst_bgr.shape[:2]
|
| 126 |
+
|
| 127 |
h, w = src_bgra.shape[:2]
|
| 128 |
+
|
| 129 |
if w <= 0 or h <= 0: return dst_bgr
|
| 130 |
+
|
| 131 |
x0, y0 = max(int(x), 0), max(int(y), 0)
|
| 132 |
+
|
| 133 |
x1, y1 = min(int(x + w), W), min(int(y + h), H)
|
| 134 |
+
|
| 135 |
if x1 <= x0 or y1 <= y0: return dst_bgr
|
| 136 |
+
|
| 137 |
roi = dst_bgr[y0:y1, x0:x1]
|
| 138 |
+
|
| 139 |
src_crop = src_bgra[(y0 - int(y)):(y0 - int(y)) + (y1 - y0), (x0 - int(x)):(x0 - int(x)) + (x1 - x0)]
|
| 140 |
+
|
| 141 |
if src_crop.shape[2] == 3:
|
| 142 |
+
|
| 143 |
roi[:] = src_crop
|
| 144 |
+
|
| 145 |
return dst_bgr
|
| 146 |
|
| 147 |
alpha = src_crop[:, :, 3].astype(np.uint16)
|
| 148 |
+
|
| 149 |
inv_alpha = 255 - alpha
|
| 150 |
+
|
| 151 |
b_src, g_src, r_src = src_crop[:, :, 0], src_crop[:, :, 1], src_crop[:, :, 2]
|
| 152 |
+
|
| 153 |
b_dst, g_dst, r_dst = roi[:, :, 0], roi[:, :, 1], roi[:, :, 2]
|
| 154 |
|
| 155 |
roi[:, :, 0] = ((b_src * alpha + b_dst * inv_alpha) >> 8).astype(np.uint8)
|
| 156 |
+
|
| 157 |
roi[:, :, 1] = ((g_src * alpha + g_dst * inv_alpha) >> 8).astype(np.uint8)
|
| 158 |
+
|
| 159 |
roi[:, :, 2] = ((r_src * alpha + r_dst * inv_alpha) >> 8).astype(np.uint8)
|
| 160 |
+
|
| 161 |
return dst_bgr
|
| 162 |
|
| 163 |
+
|
| 164 |
+
|
| 165 |
def _find_ui_dir():
|
| 166 |
+
|
| 167 |
cand = os.path.join(os.getcwd(), "UI")
|
| 168 |
+
|
| 169 |
if os.path.isdir(cand): return cand
|
| 170 |
+
|
| 171 |
+
return None # Simplified for brevity
|
| 172 |
+
|
| 173 |
+
|
| 174 |
|
| 175 |
def _find_font_path(ui_dir, filename="Inter_18pt-Bold.ttf"):
|
| 176 |
+
|
| 177 |
if not ui_dir: return None
|
| 178 |
+
|
| 179 |
p = os.path.join(ui_dir, filename)
|
| 180 |
+
|
| 181 |
return p if os.path.isfile(p) else None
|
| 182 |
|
| 183 |
+
|
| 184 |
+
|
| 185 |
def _load_ui_images(ui_dir):
|
| 186 |
+
|
| 187 |
if not ui_dir or (not os.path.isdir(ui_dir)): return []
|
| 188 |
+
|
| 189 |
imgs = []
|
| 190 |
+
|
| 191 |
for name in sorted(os.listdir(ui_dir)):
|
| 192 |
+
|
| 193 |
p = os.path.join(ui_dir, name)
|
| 194 |
+
|
| 195 |
if not os.path.isfile(p): continue
|
| 196 |
+
|
| 197 |
if name.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
|
| 198 |
+
|
| 199 |
im = cv2.imread(p, cv2.IMREAD_UNCHANGED)
|
| 200 |
+
|
| 201 |
if im is not None:
|
| 202 |
+
|
| 203 |
if im.ndim == 2: im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR)
|
| 204 |
+
|
| 205 |
imgs.append(im)
|
| 206 |
+
|
| 207 |
return imgs
|
| 208 |
|
| 209 |
+
|
| 210 |
+
|
| 211 |
class SimpleUIOverlay:
|
| 212 |
+
|
| 213 |
def __init__(self, width, height, seed=0, ui_dir=None, max_images=4, show_prob=0.6, min_hold_frames=20, max_hold_frames=120):
|
| 214 |
+
|
| 215 |
self.W, self.H = int(width), int(height)
|
| 216 |
+
|
| 217 |
self.rng = np.random.default_rng(int(seed))
|
| 218 |
+
|
| 219 |
self.max_images = max(0, int(max_images))
|
| 220 |
+
|
| 221 |
self.show_prob = float(show_prob)
|
| 222 |
+
|
| 223 |
self.min_hold_frames, self.max_hold_frames = max(1, int(min_hold_frames)), max(1, int(max_hold_frames))
|
| 224 |
+
|
| 225 |
self.ui_dir = ui_dir if ui_dir else _find_ui_dir()
|
| 226 |
+
|
| 227 |
self.assets = _load_ui_images(self.ui_dir)
|
| 228 |
+
|
| 229 |
self._ttl, self._active = 0, []
|
| 230 |
|
| 231 |
def _pick_new_state(self):
|
| 232 |
+
|
| 233 |
self._ttl = int(self.rng.integers(self.min_hold_frames, self.max_hold_frames + 1))
|
| 234 |
+
|
| 235 |
self._active = []
|
| 236 |
+
|
| 237 |
if (not self.assets) or (self.max_images <= 0): return
|
| 238 |
+
|
| 239 |
if float(self.rng.random()) > self.show_prob: return
|
| 240 |
+
|
| 241 |
k = min(int(self.rng.integers(1, self.max_images + 1)), len(self.assets))
|
| 242 |
+
|
| 243 |
idxs = self.rng.choice(len(self.assets), size=k, replace=False)
|
| 244 |
+
|
| 245 |
for idx in idxs:
|
| 246 |
+
|
| 247 |
im = self.assets[int(idx)]
|
| 248 |
+
|
| 249 |
h, w = im.shape[:2]
|
| 250 |
+
|
| 251 |
if w > 0 and h > 0:
|
| 252 |
+
|
| 253 |
x = int(self.rng.integers(-w // 4, max(1, self.W - (3 * w // 4))))
|
| 254 |
+
|
| 255 |
y = int(self.rng.integers(-h // 4, max(1, self.H - (3 * h // 4))))
|
| 256 |
+
|
| 257 |
self._active.append((im, x, y))
|
| 258 |
|
| 259 |
def draw(self, img_bgr):
|
| 260 |
+
|
| 261 |
if img_bgr is None: return img_bgr
|
| 262 |
+
|
| 263 |
if self._ttl <= 0: self._pick_new_state()
|
| 264 |
+
|
| 265 |
self._ttl -= 1
|
| 266 |
+
|
| 267 |
+
for im, x, y in self._active: _alpha_blend_bgra_onto_bgr(img_bgr, im, x, y)
|
| 268 |
+
|
| 269 |
return img_bgr
|
| 270 |
|
| 271 |
+
|
| 272 |
+
|
| 273 |
class SimpleChatOverlay:
|
| 274 |
+
|
| 275 |
def __init__(self, width, height, seed=0, num_lines=7, region_w=420, region_h=180, margin=18, every_n_frames=15, corner=None, font_path=None):
|
| 276 |
+
|
| 277 |
from collections import deque
|
| 278 |
+
|
| 279 |
self.W, self.H = int(width), int(height)
|
| 280 |
+
|
| 281 |
self.rng = np.random.default_rng(int(seed))
|
| 282 |
+
|
| 283 |
self.num_lines, self.margin, self.every_n_frames = int(num_lines), int(margin), max(1, int(every_n_frames))
|
| 284 |
+
|
| 285 |
self.region_w, self.region_h = int(region_w), int(region_h)
|
| 286 |
+
|
| 287 |
self.font_path = font_path
|
| 288 |
+
|
| 289 |
self._pil_fonts = {}
|
| 290 |
+
|
| 291 |
self.corner = str(corner) if corner else str(self.rng.choice(["tl", "tr", "bl", "br"]))
|
| 292 |
+
|
| 293 |
self.messages = deque(maxlen=self.num_lines)
|
| 294 |
+
|
| 295 |
for _ in range(self.num_lines): self.messages.append(self._random_message())
|
| 296 |
+
|
| 297 |
self._cached_overlay, self._dirty = None, True
|
| 298 |
|
| 299 |
def _random_message(self):
|
| 300 |
+
|
| 301 |
user = str(self.rng.choice(["nightbot", "viewer", "catjam", "shadow", "speedrunner", "chattycathy", "kappaking"]))
|
| 302 |
+
|
| 303 |
if self.rng.random() < 0.5: user += str(self.rng.integers(10, 999))
|
| 304 |
+
|
| 305 |
text = str(self.rng.choice(["pog", "lol", "gg", "nice", "W", "L", "no shot", "crazy", "clip it", "cooking", "unlucky"]))
|
| 306 |
+
|
| 307 |
color = tuple(int(x) for x in self.rng.choice([(255, 120, 0), (0, 180, 255), (255, 0, 180), (0, 255, 120)]))
|
| 308 |
+
|
| 309 |
return {"user": user, "text": text, "color": color}
|
| 310 |
|
| 311 |
def _get_pil_font(self, size_px):
|
| 312 |
+
|
| 313 |
if not self.font_path: return None
|
| 314 |
+
|
| 315 |
if size_px in self._pil_fonts: return self._pil_fonts[size_px]
|
| 316 |
+
|
| 317 |
try:
|
| 318 |
+
|
| 319 |
from PIL import ImageFont
|
| 320 |
+
|
| 321 |
+
return ImageFont.truetype(self.font_path, size=max(1, size_px))
|
| 322 |
+
|
| 323 |
except: return None
|
| 324 |
|
| 325 |
def maybe_append(self, frame_idx):
|
| 326 |
+
|
| 327 |
if int(frame_idx) % self.every_n_frames == 0:
|
| 328 |
+
|
| 329 |
self.messages.append(self._random_message())
|
| 330 |
+
|
| 331 |
self._dirty = True
|
| 332 |
|
| 333 |
def _render_cache(self):
|
| 334 |
+
|
| 335 |
rw = min(self.region_w, max(40, self.W - 2 * self.margin))
|
| 336 |
+
|
| 337 |
rh = min(self.region_h, max(40, self.H - 2 * self.margin))
|
| 338 |
+
|
| 339 |
pil_font = self._get_pil_font(int(round(np.clip(20.0 * (self.H / 720.0), 14.0, 30.0))))
|
| 340 |
+
|
| 341 |
if pil_font is None:
|
| 342 |
+
|
| 343 |
+
self._cached_overlay = None; return
|
| 344 |
+
|
| 345 |
try:
|
| 346 |
+
|
| 347 |
from PIL import Image, ImageDraw
|
| 348 |
+
|
| 349 |
pil = Image.new("RGBA", (rw, rh), (0, 0, 0, 0))
|
| 350 |
+
|
| 351 |
draw = ImageDraw.Draw(pil)
|
| 352 |
+
|
| 353 |
+
line_h = max(14, int(round(float(getattr(pil_font, "size", 18)) * 1.25)))
|
| 354 |
+
|
| 355 |
lines = list(self.messages)[-min(self.num_lines, max(1, rh // line_h)):]
|
| 356 |
+
|
| 357 |
local_y = rh - line_h if self.corner in ("bl", "br") else 0
|
| 358 |
+
|
| 359 |
for msg in lines:
|
| 360 |
+
|
| 361 |
user = f"{msg['user']}: "
|
| 362 |
+
|
| 363 |
+
draw.text((0, local_y), user, font=pil_font, fill=tuple(msg['color'][::-1]))
|
| 364 |
+
|
| 365 |
tw = draw.textlength(user, font=pil_font)
|
| 366 |
+
|
| 367 |
draw.text((tw, local_y), msg['text'], font=pil_font, fill=(240, 240, 240))
|
| 368 |
+
|
| 369 |
local_y += (-line_h if self.corner in ("bl", "br") else line_h)
|
| 370 |
+
|
| 371 |
self._cached_overlay = cv2.cvtColor(np.asarray(pil), cv2.COLOR_RGBA2BGRA)
|
| 372 |
+
|
| 373 |
except: self._cached_overlay = None
|
| 374 |
|
| 375 |
def draw(self, img_bgr):
|
| 376 |
+
|
| 377 |
if img_bgr is None: return img_bgr
|
| 378 |
+
|
| 379 |
+
if self._dirty: self._render_cache(); self._dirty = False
|
| 380 |
+
|
| 381 |
if self._cached_overlay is not None:
|
| 382 |
+
|
| 383 |
rw = min(self.region_w, max(40, self.W - 2 * self.margin))
|
| 384 |
+
|
| 385 |
rh = min(self.region_h, max(40, self.H - 2 * self.margin))
|
| 386 |
+
|
| 387 |
if self.corner == "tl": x, y = self.margin, self.margin
|
| 388 |
+
|
| 389 |
elif self.corner == "tr": x, y = self.W - self.margin - rw, self.margin
|
| 390 |
+
|
| 391 |
elif self.corner == "bl": x, y = self.margin, self.H - self.margin - rh
|
| 392 |
+
|
| 393 |
else: x, y = self.W - self.margin - rw, self.H - self.margin - rh
|
| 394 |
+
|
| 395 |
_alpha_blend_bgra_onto_bgr(img_bgr, self._cached_overlay, x, y)
|
| 396 |
+
|
| 397 |
return img_bgr
|
| 398 |
|
| 399 |
+
|
| 400 |
+
|
| 401 |
+
def k4_to_K3(k4): return np.array([[k4[0], 0, k4[2]], [0, k4[1], k4[3]], [0, 0, 1]], dtype=np.float32)
|
| 402 |
|
| 403 |
def bbox_xywh_to_bbx_xys(bbox_xywh, base_enlarge=1.0):
|
| 404 |
+
|
| 405 |
x, y, w, h = [float(v) for v in bbox_xywh]
|
| 406 |
+
|
| 407 |
return np.array([x + 0.5 * w, y + 0.5 * h, max(w, h) * float(base_enlarge)], dtype=np.float32)
|
| 408 |
|
| 409 |
def clamp_bbox_xywh_to_image(bbox_xywh, W, H, min_size=1.0):
|
| 410 |
+
|
| 411 |
x, y, w, h = [float(v) for v in bbox_xywh]
|
| 412 |
+
|
| 413 |
W, H = float(W), float(H)
|
| 414 |
+
|
| 415 |
if W <= 0 or H <= 0: return [0.0, 0.0, 0.0, 0.0]
|
| 416 |
+
|
| 417 |
x2, y2 = x + w, y + h
|
| 418 |
+
|
| 419 |
x1c = float(np.clip(x, 0.0, max(0.0, W - 1.0)))
|
| 420 |
+
|
| 421 |
y1c = float(np.clip(y, 0.0, max(0.0, H - 1.0)))
|
| 422 |
+
|
| 423 |
x2c = float(np.clip(x2, 0.0, W))
|
| 424 |
+
|
| 425 |
y2c = float(np.clip(y2, 0.0, H))
|
| 426 |
+
|
| 427 |
if x2c <= x1c: x2c = min(W, x1c + float(min_size))
|
| 428 |
+
|
| 429 |
if y2c <= y1c: y2c = min(H, y1c + float(min_size))
|
| 430 |
+
|
| 431 |
wc = max(0.0, x2c - x1c)
|
| 432 |
+
|
| 433 |
hc = max(0.0, y2c - y1c)
|
| 434 |
+
|
| 435 |
return [x1c, y1c, wc, hc]
|
| 436 |
|
| 437 |
def draw_bbox_xywh_and_center(img_bgr, bbox_xywh, color=(255, 255, 0)):
|
| 438 |
+
|
| 439 |
x, y, w, h = [float(v) for v in bbox_xywh]
|
| 440 |
+
|
| 441 |
cv2.rectangle(img_bgr, (int(x), int(y)), (int(x+w), int(y+h)), color, 2)
|
| 442 |
+
|
| 443 |
cv2.circle(img_bgr, (int(x+w/2), int(y+h/2)), 4, (0, 0, 255), -1)
|
| 444 |
|
| 445 |
def vis_label_and_color(v: int):
|
| 446 |
+
|
| 447 |
if v == 2: return "VIS", (0, 255, 0)
|
| 448 |
+
|
| 449 |
if v == 1: return "OCC", (0, 165, 255)
|
| 450 |
+
|
| 451 |
return "OFF", (160, 160, 160)
|
| 452 |
|
| 453 |
def draw_vis_text_and_points(img_bgr, kpts2d_xy, vis17):
|
| 454 |
+
|
| 455 |
for k in range(17):
|
| 456 |
+
|
| 457 |
v = int(vis17[k])
|
| 458 |
+
|
| 459 |
label, color = vis_label_and_color(v)
|
| 460 |
+
|
| 461 |
x, y = int(round(kpts2d_xy[k, 0])), int(round(kpts2d_xy[k, 1]))
|
| 462 |
+
|
| 463 |
if v > 0: cv2.circle(img_bgr, (x, y), 4, color, -1)
|
| 464 |
+
|
| 465 |
cv2.putText(img_bgr, f"{k}:{label}", (x + 6, y - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 1, cv2.LINE_AA)
|
| 466 |
|
| 467 |
def build_T_wc(pos_world, quat_world_xyzw):
|
| 468 |
+
|
| 469 |
T = np.eye(4, dtype=np.float64)
|
| 470 |
+
|
| 471 |
T[:3, :3] = R.from_quat(np.asarray(quat_world_xyzw, dtype=np.float64)).as_matrix()
|
| 472 |
+
|
| 473 |
T[:3, 3] = np.asarray(pos_world, dtype=np.float64)
|
| 474 |
+
|
| 475 |
return T
|
| 476 |
|
| 477 |
def compute_velocity(mats, fps=30.0):
|
| 478 |
+
|
| 479 |
N = len(mats)
|
| 480 |
+
|
| 481 |
if N < 2: return np.zeros((N, 3), dtype=np.float32), np.zeros((N, 3), dtype=np.float32)
|
| 482 |
+
|
| 483 |
R_curr = mats[:, :3, :3]
|
| 484 |
+
|
| 485 |
R_diff = np.matmul(R_curr[1:], np.transpose(R_curr[:-1], (0, 2, 1)))
|
| 486 |
+
|
| 487 |
rv = R.from_matrix(R_diff).as_rotvec()
|
| 488 |
+
|
| 489 |
angvel = np.zeros((N, 3), dtype=np.float32)
|
| 490 |
+
|
| 491 |
angvel[1:] = rv
|
| 492 |
+
|
| 493 |
t_curr = mats[:, :3, 3]
|
| 494 |
+
|
| 495 |
tvel = np.zeros((N, 3), dtype=np.float32)
|
| 496 |
+
|
| 497 |
tvel[1:] = t_curr[1:] - t_curr[:-1]
|
| 498 |
+
|
| 499 |
return angvel.astype(np.float32), tvel.astype(np.float32)
|
| 500 |
|
| 501 |
+
|
| 502 |
+
|
| 503 |
def _compute_vitpose_selected_indices(num_frames, fps, bucket_seconds, frames_per_bucket, sampling="uniform", seed=123):
|
| 504 |
+
|
| 505 |
if num_frames <= 0: return []
|
| 506 |
+
|
| 507 |
rng = np.random.default_rng(int(seed))
|
| 508 |
+
|
| 509 |
selected = []
|
| 510 |
+
|
| 511 |
bucket_len = max(1, int(round(float(bucket_seconds) * float(fps))))
|
| 512 |
+
|
| 513 |
b_start = 0
|
| 514 |
+
|
| 515 |
while b_start < num_frames:
|
| 516 |
+
|
| 517 |
b_end = min(num_frames, b_start + bucket_len)
|
| 518 |
+
|
| 519 |
k = min(int(frames_per_bucket), b_end - b_start)
|
| 520 |
+
|
| 521 |
if k > 0:
|
| 522 |
+
|
| 523 |
+
if sampling == "random": idxs = np.sort(rng.choice(np.arange(b_start, b_end), size=k, replace=False)).tolist()
|
| 524 |
+
|
| 525 |
+
elif sampling == "linspace": idxs = sorted(list(set(np.linspace(b_start, b_end - 1, k, dtype=int).tolist())))
|
| 526 |
+
|
| 527 |
+
else:
|
| 528 |
+
|
| 529 |
if k == 1: idxs = [b_start + (b_end - b_start) // 2]
|
| 530 |
+
|
| 531 |
+
else: step = (b_end - b_start) // k; idxs = [min(b_start + i * step, b_end - 1) for i in range(k)]
|
| 532 |
+
|
| 533 |
selected.extend(idxs)
|
| 534 |
+
|
| 535 |
b_start = b_end
|
| 536 |
+
|
| 537 |
return sorted(list(set(selected)))
|
| 538 |
|
| 539 |
+
|
| 540 |
+
|
| 541 |
_SMPLX_MODEL = None
|
| 542 |
+
|
| 543 |
_SMPLX_DEVICE = None
|
| 544 |
|
| 545 |
def _get_smplx_model(device):
|
| 546 |
+
|
| 547 |
global _SMPLX_MODEL, _SMPLX_DEVICE
|
| 548 |
+
|
| 549 |
+
if _SMPLX_MODEL is not None and _SMPLX_DEVICE == device: return _SMPLX_MODEL
|
| 550 |
+
|
| 551 |
from hmr4d.utils.smplx_utils import make_smplx
|
| 552 |
+
|
| 553 |
_SMPLX_MODEL = make_smplx("supermotion").to(device).eval()
|
| 554 |
+
|
| 555 |
_SMPLX_DEVICE = device
|
| 556 |
+
|
| 557 |
return _SMPLX_MODEL
|
| 558 |
|
| 559 |
+
|
| 560 |
+
|
| 561 |
class SmplIncamRenderer:
|
| 562 |
+
|
| 563 |
def __init__(self, width, height, K4, device="cuda", smplx2smpl_path="hmr4d/utils/body_model/smplx2smpl_sparse.pt"):
|
| 564 |
+
|
| 565 |
from hmr4d.utils.smplx_utils import make_smplx
|
| 566 |
+
|
| 567 |
from hmr4d.utils.vis.renderer import Renderer
|
| 568 |
+
|
| 569 |
self.torch = torch
|
| 570 |
+
|
| 571 |
self.device = device
|
| 572 |
+
|
| 573 |
self.smplx = make_smplx("supermotion").to(device).eval()
|
| 574 |
+
|
| 575 |
+
self.smplx2smpl = None; self.faces = None
|
| 576 |
+
|
| 577 |
try:
|
| 578 |
+
|
| 579 |
self.smplx2smpl = torch.load(smplx2smpl_path).to(device)
|
| 580 |
+
|
| 581 |
self.faces = make_smplx("smpl").faces
|
| 582 |
+
|
| 583 |
except: self.faces = self.smplx.faces
|
| 584 |
+
|
| 585 |
self.K_torch = torch.from_numpy(k4_to_K3(K4)).to(device)
|
| 586 |
+
|
| 587 |
self.renderer = Renderer(width, height, device=device, faces=self.faces, K=self.K_torch)
|
| 588 |
|
| 589 |
@torch.no_grad()
|
| 590 |
+
|
| 591 |
def render(self, img_rgb_uint8, global_orient_aa, body_pose_aa, betas_10, transl_xyz, fl, pp):
|
| 592 |
+
|
| 593 |
K3_torch = torch.from_numpy(np.array([[fl[0], 0, pp[0]], [0, fl[1], pp[1]], [0, 0, 1]], dtype=np.float32)).to(self.device)
|
| 594 |
+
|
| 595 |
self.renderer.set_intrinsic(K3_torch)
|
| 596 |
+
|
| 597 |
+
params = { "global_orient": torch.from_numpy(global_orient_aa[None]).float().to(self.device), "body_pose": torch.from_numpy(body_pose_aa[None]).float().to(self.device), "betas": torch.from_numpy(betas_10[None]).float().to(self.device), "transl": torch.from_numpy(transl_xyz[None]).float().to(self.device), }
|
| 598 |
+
|
| 599 |
+
out = self.smplx(**params); verts = out.vertices[0]
|
| 600 |
+
|
|
|
|
|
|
|
|
|
|
| 601 |
if self.smplx2smpl is not None and verts.dim() == 2: verts = torch.matmul(self.smplx2smpl, verts)
|
| 602 |
+
|
| 603 |
img_out = self.renderer.render_mesh(verts, img_rgb_uint8, [0.8, 0.8, 0.8])
|
| 604 |
+
|
| 605 |
return img_out
|
| 606 |
+
|
| 607 |
@torch.no_grad()
|
| 608 |
+
|
| 609 |
def get_verts(self, global_orient_aa, body_pose_aa, betas_10, transl_xyz):
|
| 610 |
+
|
| 611 |
+
params = { "global_orient": torch.from_numpy(global_orient_aa[None]).float().to(self.device), "body_pose": torch.from_numpy(body_pose_aa[None]).float().to(self.device), "betas": torch.from_numpy(betas_10[None]).float().to(self.device), "transl": torch.from_numpy(transl_xyz[None]).float().to(self.device), }
|
| 612 |
+
|
| 613 |
+
out = self.smplx(**params); verts = out.vertices[0]
|
| 614 |
+
|
| 615 |
+
if self.smplx2smpl is not None and verts.dim() == 2: verts = torch.matmul(self.smplx2smpl, verts)
|
| 616 |
+
|
|
|
|
|
|
|
|
|
|
| 617 |
return verts
|
| 618 |
|
| 619 |
+
|
| 620 |
+
|
| 621 |
def _as_betas10(betas_any) -> np.ndarray:
|
| 622 |
+
|
| 623 |
betas = np.asarray(betas_any, dtype=np.float32).reshape(-1)
|
| 624 |
+
|
| 625 |
+
betas10 = np.zeros(10, dtype=np.float32); n = min(10, betas.size)
|
| 626 |
+
|
| 627 |
if n > 0: betas10[:n] = betas[:n]
|
| 628 |
+
|
| 629 |
return betas10
|
| 630 |
|
| 631 |
def load_betas10_from_npz(npz_path, key="betas", index=None):
|
| 632 |
+
|
| 633 |
+
with np.load(npz_path, allow_pickle=True) as data: arr = data[key]
|
| 634 |
+
|
| 635 |
if arr.ndim == 0: arr = np.asarray(arr).reshape(1)
|
| 636 |
+
|
| 637 |
if arr.ndim == 1: betas = arr
|
| 638 |
+
|
| 639 |
+
elif arr.ndim == 2: row_idx = 0 if index is None else int(index); betas = arr[row_idx]
|
| 640 |
+
|
| 641 |
else: raise ValueError(f"Bad betas shape: {arr.shape}")
|
| 642 |
+
|
| 643 |
return _as_betas10(betas)
|
| 644 |
|
| 645 |
+
def _default_shape_npz_path() -> str: return os.path.join(os.path.dirname(__file__), "shape.npz")
|
| 646 |
+
|
| 647 |
+
|
| 648 |
|
| 649 |
def parse_smpl_inputs_from_row(row, override_betas10=None, keep_unity_scale=False, transl_source="pelvis", transl_y_offset_m=0.0):
|
| 650 |
+
|
| 651 |
C = np.diag([1.0, -1.0, 1.0]).astype(np.float64)
|
| 652 |
+
|
| 653 |
cam_rot_w_quat = np.array(row["cam_rot_world"], dtype=np.float64)
|
| 654 |
+
|
| 655 |
R_cam_w = R.from_quat(cam_rot_w_quat).as_matrix()
|
| 656 |
+
|
| 657 |
pel_rot_w_quat = np.array(row["pelvis_rot_world"], dtype=np.float64)
|
| 658 |
+
|
| 659 |
R_pel_w = R.from_quat(pel_rot_w_quat).as_matrix()
|
| 660 |
+
|
| 661 |
+
|
| 662 |
+
|
| 663 |
+
# Relative Rotation (Body to Camera)
|
| 664 |
+
|
| 665 |
R_rel_unity = R_cam_w.T @ R_pel_w
|
| 666 |
+
|
| 667 |
R_cv = C @ R_rel_unity @ C
|
| 668 |
+
|
| 669 |
+
R_final = R_cv @ R.from_euler("z", 180, degrees=True).as_matrix()
|
| 670 |
+
|
| 671 |
global_orient_aa = R.from_matrix(R_final).as_rotvec().astype(np.float32)
|
| 672 |
|
| 673 |
+
|
| 674 |
+
|
| 675 |
smpl_scale = float(row.get("smpl_root_world_scale", 1.0))
|
| 676 |
+
|
| 677 |
pelvis_cam_unity = np.asarray(row["smpl_incam_transl"], dtype=np.float64).reshape(3)
|
| 678 |
+
|
| 679 |
root_cam_unity = np.asarray(row.get("smpl_root_incam_transl", [0.0, 0.0, 0.0]), dtype=np.float64).reshape(3)
|
| 680 |
+
|
| 681 |
pelvis_cam_unity = pelvis_cam_unity + np.array([0.0, float(transl_y_offset_m), 0.0], dtype=np.float64)
|
| 682 |
|
| 683 |
+
|
| 684 |
+
|
| 685 |
if str(transl_source).strip().lower() == "root": target_cam_unity = root_cam_unity
|
| 686 |
+
|
| 687 |
else:
|
| 688 |
+
|
| 689 |
if bool(keep_unity_scale): target_cam_unity = pelvis_cam_unity
|
| 690 |
+
|
| 691 |
else:
|
| 692 |
+
|
| 693 |
if abs(smpl_scale) > 1e-8: target_cam_unity = root_cam_unity + (pelvis_cam_unity - root_cam_unity) / smpl_scale
|
| 694 |
+
|
| 695 |
else: target_cam_unity = pelvis_cam_unity
|
| 696 |
+
|
| 697 |
target_cam_cv = (C @ target_cam_unity).astype(np.float64)
|
| 698 |
|
| 699 |
+
|
| 700 |
+
|
| 701 |
pose = np.asarray(row["smplx_pose"], dtype=np.float32)
|
| 702 |
+
|
| 703 |
body_pose = pose[3:66].astype(np.float32)
|
| 704 |
+
|
| 705 |
betas10 = _as_betas10(override_betas10)
|
| 706 |
+
|
| 707 |
+
|
| 708 |
+
|
| 709 |
return {
|
| 710 |
+
|
| 711 |
"global_orient": global_orient_aa, "body_pose": body_pose, "betas": betas10,
|
| 712 |
+
|
| 713 |
"target_cam_cv": target_cam_cv, "cam_rot_w_quat": cam_rot_w_quat,
|
| 714 |
+
|
| 715 |
"cam_pos_world": np.asarray(row["cam_pos_world"], dtype=np.float64).reshape(3),
|
| 716 |
+
|
| 717 |
"pelvis_pos_world": np.asarray(row["pelvis_pos_world"], dtype=np.float64).reshape(3),
|
| 718 |
+
|
| 719 |
"smpl_scale": smpl_scale, "root_cam_unity": root_cam_unity
|
| 720 |
+
|
| 721 |
}
|
| 722 |
|
| 723 |
+
|
| 724 |
+
|
| 725 |
def batch_smpl_forward(betas, global_orient, body_pose, device):
|
| 726 |
+
|
| 727 |
model = _get_smplx_model(device)
|
| 728 |
+
|
| 729 |
N = len(betas)
|
| 730 |
+
|
| 731 |
+
chunk_size = 4096; pelvis_list = []
|
| 732 |
+
|
| 733 |
with torch.no_grad():
|
| 734 |
+
|
| 735 |
for i in range(0, N, chunk_size):
|
| 736 |
+
|
| 737 |
b_betas = torch.from_numpy(betas[i:i+chunk_size]).float().to(device)
|
| 738 |
+
|
| 739 |
b_go = torch.from_numpy(global_orient[i:i+chunk_size]).float().to(device)
|
| 740 |
+
|
| 741 |
b_bp = torch.from_numpy(body_pose[i:i+chunk_size]).float().to(device)
|
| 742 |
+
|
| 743 |
b_tr = torch.zeros((len(b_betas), 3), dtype=torch.float32, device=device)
|
| 744 |
+
|
| 745 |
out = model(betas=b_betas, global_orient=b_go, body_pose=b_bp, transl=b_tr)
|
| 746 |
+
|
| 747 |
pelvis_list.append(out.joints[:, 0, :].detach().cpu().numpy())
|
| 748 |
+
|
| 749 |
return np.concatenate(pelvis_list, axis=0)
|
| 750 |
|
| 751 |
+
|
| 752 |
+
|
| 753 |
def main():
|
| 754 |
+
|
| 755 |
parser = argparse.ArgumentParser()
|
| 756 |
+
|
| 757 |
parser.add_argument("--input", required=True)
|
| 758 |
+
|
| 759 |
parser.add_argument("--output", required=True)
|
| 760 |
+
|
| 761 |
parser.add_argument("--debug", action="store_true")
|
| 762 |
+
|
| 763 |
parser.add_argument("--vitpose", action="store_true")
|
| 764 |
+
|
| 765 |
+
parser.add_argument("--genmo", action="store_true")
|
| 766 |
+
|
| 767 |
parser.add_argument("--dpvo", action="store_true")
|
| 768 |
+
|
| 769 |
parser.add_argument("--smplx", action="store_true")
|
| 770 |
+
|
| 771 |
parser.add_argument("--debug_no_coco", action="store_true")
|
| 772 |
+
|
| 773 |
parser.add_argument("--shape_npz", default=_default_shape_npz_path())
|
| 774 |
+
|
| 775 |
parser.add_argument("--vitpose_use_all_frames", action="store_true")
|
| 776 |
+
|
| 777 |
parser.add_argument("--vitpose_bucket_seconds", type=float, default=12.0)
|
| 778 |
+
|
| 779 |
parser.add_argument("--vitpose_frames_per_bucket", type=int, default=36)
|
| 780 |
+
|
| 781 |
parser.add_argument("--vitpose_sampling", type=str, default="random")
|
| 782 |
+
|
| 783 |
parser.add_argument("--vitpose_seed", type=int, default=123)
|
| 784 |
+
|
| 785 |
parser.add_argument("--ui_dir", type=str, default=None)
|
| 786 |
+
|
| 787 |
parser.add_argument("--ui_show_prob", type=float, default=0.25)
|
| 788 |
+
|
| 789 |
parser.add_argument("--ui_max_images", type=int, default=3)
|
| 790 |
+
|
| 791 |
parser.add_argument("--ui_hold_min_s", type=float, default=0.7)
|
| 792 |
+
|
| 793 |
parser.add_argument("--ui_hold_max_s", type=float, default=5.0)
|
| 794 |
+
|
| 795 |
parser.add_argument("--ui_seed", type=int, default=None)
|
| 796 |
+
|
| 797 |
parser.add_argument("--keep_unity_scale", action="store_true")
|
| 798 |
+
|
| 799 |
parser.add_argument("--transl_source", type=str, default="pelvis")
|
| 800 |
+
|
| 801 |
parser.add_argument("--transl_y_offset_m", type=float, default=-0.020)
|
| 802 |
+
|
| 803 |
parser.add_argument("--world_y_offset_m", type=float, default=1.3415)
|
| 804 |
+
|
| 805 |
parser.add_argument("--vit_batch_size", type=int, default=512, help="Batch size for in-memory ViT extraction")
|
| 806 |
+
|
| 807 |
args = parser.parse_args()
|
| 808 |
|
| 809 |
+
|
| 810 |
+
|
| 811 |
if not (args.vitpose or args.genmo or args.dpvo or args.smplx):
|
| 812 |
+
|
| 813 |
args.vitpose = args.genmo = args.dpvo = args.smplx = True
|
| 814 |
|
| 815 |
+
|
| 816 |
+
|
| 817 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 818 |
+
|
| 819 |
print(f"Running STREAMING processing on {device.upper()}...")
|
| 820 |
+
|
| 821 |
+
|
| 822 |
+
|
| 823 |
vit_model = None
|
| 824 |
+
|
| 825 |
if args.genmo and Extractor is not None:
|
| 826 |
+
|
| 827 |
print("Initializing ViT Extractor (HMR2)...")
|
| 828 |
+
|
| 829 |
extractor_wrapper = Extractor(tqdm_leave=False)
|
| 830 |
+
|
| 831 |
vit_model = extractor_wrapper.extractor
|
| 832 |
+
|
| 833 |
vit_model.eval()
|
| 834 |
+
|
| 835 |
vit_model.to(device)
|
| 836 |
|
| 837 |
+
|
| 838 |
+
|
| 839 |
override_betas10 = load_betas10_from_npz(args.shape_npz, key="betas")
|
| 840 |
+
|
| 841 |
temp_ann_dir = os.path.join(args.output, "vitpose", "temp_annotations")
|
| 842 |
+
|
| 843 |
os.makedirs(temp_ann_dir, exist_ok=True)
|
| 844 |
+
|
| 845 |
jsonl_files = sorted(glob(os.path.join(args.input, "sequence_*.jsonl")))
|
| 846 |
+
|
| 847 |
+
|
| 848 |
+
|
| 849 |
global_J_reg = None
|
| 850 |
+
|
| 851 |
j_reg_path = "third_party/GVHMR/inputs/checkpoints/body_models/smpl_neutral_J_regressor.pt"
|
| 852 |
+
|
| 853 |
if os.path.exists(j_reg_path) and device == "cuda":
|
| 854 |
+
|
| 855 |
global_J_reg = torch.load(j_reg_path, map_location=device)
|
| 856 |
|
| 857 |
+
|
| 858 |
+
|
| 859 |
for jsonl_idx, jsonl_path in enumerate(jsonl_files):
|
| 860 |
+
|
| 861 |
seq_name = os.path.splitext(os.path.basename(jsonl_path))[0].replace("sequence_", "")
|
| 862 |
+
|
| 863 |
print(f"[{jsonl_idx+1}/{len(jsonl_files)}] Processing {seq_name}...")
|
| 864 |
+
|
| 865 |
+
|
| 866 |
+
|
| 867 |
prof = {"smpl_batch": 0.0, "video_read": 0.0, "overlay": 0.0, "vit_process": 0.0,
|
| 868 |
+
|
| 869 |
"sparse_write": 0.0, "loop_total": 0.0, "save_files": 0.0, "debug_rend": 0.0, "prep": 0.0}
|
| 870 |
|
| 871 |
+
|
| 872 |
+
|
| 873 |
t_start_seq = time.perf_counter()
|
| 874 |
+
|
| 875 |
jsonl_dir = os.path.dirname(os.path.abspath(jsonl_path))
|
| 876 |
+
|
| 877 |
video_path = os.path.join(jsonl_dir, f"video_{seq_name}.mp4")
|
| 878 |
+
|
| 879 |
if not os.path.exists(video_path): video_path = os.path.join(jsonl_dir, "video.mp4")
|
| 880 |
|
| 881 |
+
|
| 882 |
+
|
| 883 |
out_img_folder = os.path.join(args.output, "images", seq_name)
|
| 884 |
+
|
| 885 |
os.makedirs(out_img_folder, exist_ok=True)
|
| 886 |
+
|
| 887 |
+
|
| 888 |
|
| 889 |
with open(jsonl_path, "r") as f: lines = f.readlines()
|
| 890 |
+
|
| 891 |
lines = lines[1:] if len(lines) > 0 else []
|
| 892 |
+
|
| 893 |
num_frames = len(lines)
|
| 894 |
+
|
| 895 |
if num_frames <= 0: continue
|
| 896 |
|
| 897 |
+
|
| 898 |
+
|
| 899 |
genmo_out = os.path.join(args.output, "genmo_features", f"{seq_name}.pt")
|
| 900 |
+
|
| 901 |
smplx_out = os.path.join(args.output, "smplx_incam", f"{seq_name}_smplx.npz")
|
| 902 |
+
|
| 903 |
smplx_global_out = os.path.join(args.output, "smplx_global", f"{seq_name}_global.npz")
|
| 904 |
+
|
| 905 |
dpvo_dir = os.path.join(args.output, "dpvo", seq_name)
|
| 906 |
+
|
| 907 |
for p in [genmo_out, smplx_out, smplx_global_out, dpvo_dir]:
|
| 908 |
+
|
| 909 |
if p: os.makedirs(os.path.dirname(p), exist_ok=True)
|
| 910 |
|
| 911 |
+
|
| 912 |
+
|
| 913 |
selected_set = set()
|
| 914 |
+
|
| 915 |
if args.vitpose:
|
| 916 |
+
|
| 917 |
if args.vitpose_use_all_frames: selected_indices = list(range(num_frames))
|
| 918 |
+
|
| 919 |
else:
|
| 920 |
+
|
| 921 |
selected_indices = _compute_vitpose_selected_indices(
|
| 922 |
+
|
| 923 |
num_frames, FPS, args.vitpose_bucket_seconds,
|
| 924 |
+
|
| 925 |
args.vitpose_frames_per_bucket, args.vitpose_sampling, args.vitpose_seed
|
| 926 |
+
|
| 927 |
)
|
| 928 |
+
|
| 929 |
selected_set = set(selected_indices)
|
| 930 |
|
| 931 |
+
|
| 932 |
+
|
| 933 |
cap = cv2.VideoCapture(video_path)
|
| 934 |
+
|
| 935 |
if not cap.isOpened(): continue
|
| 936 |
+
|
| 937 |
W = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 938 |
+
|
| 939 |
H = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 940 |
+
|
| 941 |
+
|
| 942 |
+
|
| 943 |
resolved_ui_dir = args.ui_dir if args.ui_dir else _find_ui_dir()
|
| 944 |
+
|
| 945 |
chat_font_path = _find_font_path(resolved_ui_dir)
|
| 946 |
+
|
| 947 |
seq_seed = int(zlib.crc32(seq_name.encode("utf-8")) & 0xFFFFFFFF)
|
| 948 |
+
|
| 949 |
chat_aug = SimpleChatOverlay(W, H, seed=seq_seed, num_lines=7, font_path=chat_font_path)
|
| 950 |
+
|
| 951 |
+
ui_aug = SimpleUIOverlay(W, H, seed=((seq_seed ^ 0xA5A5A5A5) & 0xFFFFFFFF), ui_dir=resolved_ui_dir,
|
| 952 |
+
|
| 953 |
max_images=args.ui_max_images, show_prob=args.ui_show_prob)
|
| 954 |
|
| 955 |
+
|
| 956 |
+
|
| 957 |
# --- BATCH SMPL (GPU) ---
|
| 958 |
+
|
| 959 |
t0_smpl = time.perf_counter()
|
| 960 |
+
|
| 961 |
+
|
| 962 |
+
|
| 963 |
smpl_precalc_data = []
|
| 964 |
+
|
| 965 |
debug_global_verts_cpu = []
|
| 966 |
+
|
| 967 |
parsed_rows = []
|
| 968 |
+
|
| 969 |
+
|
| 970 |
+
|
| 971 |
for line in lines:
|
| 972 |
+
|
| 973 |
row = json.loads(line)
|
| 974 |
+
|
| 975 |
parsed_rows.append(parse_smpl_inputs_from_row(row, override_betas10, args.keep_unity_scale, args.transl_source, args.transl_y_offset_m))
|
| 976 |
+
|
| 977 |
+
|
| 978 |
+
|
| 979 |
all_betas = np.stack([d['betas'] for d in parsed_rows])
|
| 980 |
+
|
| 981 |
all_go = np.stack([d['global_orient'] for d in parsed_rows])
|
| 982 |
+
|
| 983 |
all_bp = np.stack([d['body_pose'] for d in parsed_rows])
|
| 984 |
+
|
| 985 |
+
|
| 986 |
+
|
| 987 |
+
# We need an initial batch forward to get local pelvis offsets
|
| 988 |
+
|
| 989 |
all_pelvis0 = batch_smpl_forward(all_betas, all_go, all_bp, device=device)
|
| 990 |
+
|
| 991 |
+
|
| 992 |
+
|
| 993 |
C = np.diag([1.0, -1.0, 1.0]).astype(np.float64)
|
| 994 |
+
|
| 995 |
+
C4 = np.diag([1.0, -1.0, 1.0, 1.0]).astype(np.float64)
|
| 996 |
+
|
| 997 |
all_go_w, all_pelvis_pos_w_cv = [], []
|
| 998 |
+
|
| 999 |
+
|
| 1000 |
+
|
| 1001 |
+
# --- FIX: DEFINE THE FIX ROTATION (Z-180) FOR WORLD ---
|
| 1002 |
+
|
| 1003 |
+
fix_rot = R.from_euler("z", 180, degrees=True).as_matrix()
|
| 1004 |
+
|
| 1005 |
+
fix_mat = np.eye(4, dtype=np.float64)
|
| 1006 |
+
|
| 1007 |
+
fix_mat[:3, :3] = fix_rot
|
| 1008 |
+
|
| 1009 |
+
# SMPLX-only adjustment: rotate the SMPLX global orientation in world by 180deg around Y.
|
| 1010 |
+
# (Do NOT touch camera/world transforms; this only changes the SMPL parameters.)
|
| 1011 |
+
# smplx_global_y180 = R.from_euler("y", 180, degrees=True).as_matrix()
|
| 1012 |
+
|
| 1013 |
+
|
| 1014 |
+
|
| 1015 |
for i, d in enumerate(parsed_rows):
|
| 1016 |
+
|
| 1017 |
+
# Unity Rotation (Raw)
|
| 1018 |
+
|
| 1019 |
R_cam_w_unity = R.from_quat(d['cam_rot_w_quat']).as_matrix()
|
| 1020 |
+
|
| 1021 |
+
|
| 1022 |
+
|
| 1023 |
+
# --- APPLY FIX HERE: Pre-multiply Camera Rot by Fix (Z-180) ---
|
| 1024 |
+
|
| 1025 |
+
# This ensures the SMPL global orientation is calculated relative to the FIXED Camera
|
| 1026 |
+
|
| 1027 |
+
R_cam_w_cv = fix_rot @ (C @ R_cam_w_unity @ C)
|
| 1028 |
+
|
| 1029 |
+
|
| 1030 |
+
|
| 1031 |
R_pelvis_c_cv = R.from_rotvec(d['global_orient'].astype(np.float64)).as_matrix()
|
| 1032 |
+
|
| 1033 |
+
R_pelvis_w_cv = R_cam_w_cv @ R_pelvis_c_cv
|
| 1034 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1035 |
all_go_w.append(R.from_matrix(R_pelvis_w_cv).as_rotvec().astype(np.float32))
|
| 1036 |
+
|
| 1037 |
+
|
| 1038 |
+
|
| 1039 |
+
# Position Logic
|
| 1040 |
|
| 1041 |
pelvis_pos_w_unity = d['pelvis_pos_world']
|
| 1042 |
+
|
| 1043 |
root_pos_w_unity = (R_cam_w_unity @ d['root_cam_unity'] + d['cam_pos_world']).reshape(3)
|
| 1044 |
+
|
| 1045 |
smpl_scale = d['smpl_scale']
|
| 1046 |
+
|
| 1047 |
transl_source_local = str(args.transl_source).strip().lower()
|
| 1048 |
+
|
| 1049 |
if transl_source_local == "root": target_pos_w_unity = root_pos_w_unity
|
| 1050 |
+
|
| 1051 |
else:
|
| 1052 |
+
|
| 1053 |
if bool(args.keep_unity_scale): target_pos_w_unity = pelvis_pos_w_unity
|
| 1054 |
+
|
| 1055 |
else:
|
| 1056 |
+
|
| 1057 |
if abs(smpl_scale) > 1e-8: target_pos_w_unity = root_pos_w_unity + (pelvis_pos_w_unity - root_pos_w_unity) / smpl_scale
|
| 1058 |
+
|
| 1059 |
else: target_pos_w_unity = pelvis_pos_w_unity
|
|
|
|
|
|
|
|
|
|
| 1060 |
|
| 1061 |
+
|
| 1062 |
+
|
| 1063 |
+
# --- APPLY FIX HERE: Pre-multiply Position by Fix (Z-180) ---
|
| 1064 |
+
|
| 1065 |
+
pos_cv_raw = (C @ target_pos_w_unity).astype(np.float64)
|
| 1066 |
+
|
| 1067 |
+
pelvis_pos_w_cv = fix_rot @ pos_cv_raw.reshape(3, 1)
|
| 1068 |
+
|
| 1069 |
+
all_pelvis_pos_w_cv.append(pelvis_pos_w_cv.reshape(3))
|
| 1070 |
+
|
| 1071 |
+
|
| 1072 |
+
|
| 1073 |
+
all_go_w = np.stack(all_go_w)
|
| 1074 |
+
|
| 1075 |
+
# Apply the SMPLX-only world-space rotation to global_orient.
|
| 1076 |
+
# all_go_w = (
|
| 1077 |
+
# R.from_matrix(smplx_global_y180 @ R.from_rotvec(all_go_w.astype(np.float64)).as_matrix())
|
| 1078 |
+
# .as_rotvec()
|
| 1079 |
+
# .astype(np.float32)
|
| 1080 |
+
# )
|
| 1081 |
+
|
| 1082 |
+
# Compute World-Space Pelvis offsets (dependent on global orient)
|
| 1083 |
+
|
| 1084 |
+
all_pelvis0_w = batch_smpl_forward(all_betas, all_go_w, all_bp, device=device)
|
| 1085 |
+
|
| 1086 |
+
|
| 1087 |
+
|
| 1088 |
+
for i in range(num_frames):
|
| 1089 |
+
|
| 1090 |
+
d = parsed_rows[i]
|
| 1091 |
+
|
| 1092 |
+
# Incam Transl
|
| 1093 |
+
|
| 1094 |
+
transl_c = (d['target_cam_cv'] - all_pelvis0[i]).astype(np.float32)
|
| 1095 |
+
|
| 1096 |
+
|
| 1097 |
+
|
| 1098 |
+
# World Transl
|
| 1099 |
+
|
| 1100 |
+
if str(args.transl_source) == "root": transl_w = all_pelvis_pos_w_cv[i].astype(np.float32)
|
| 1101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1102 |
else: transl_w = (all_pelvis_pos_w_cv[i] - all_pelvis0_w[i]).astype(np.float32)
|
| 1103 |
+
|
| 1104 |
+
|
| 1105 |
+
|
| 1106 |
smpl_precalc_data.append({
|
| 1107 |
+
|
| 1108 |
"go_c": d['global_orient'], "bp": d['body_pose'], "beta": d['betas'], "tr_c": transl_c,
|
| 1109 |
+
|
| 1110 |
+
"go_w": all_go_w[i], "tr_w": transl_w,
|
| 1111 |
+
|
| 1112 |
+
"cam_rot_w_quat": d["cam_rot_w_quat"], "cam_pos_world": d["cam_pos_world"]
|
| 1113 |
+
|
| 1114 |
})
|
| 1115 |
+
|
| 1116 |
+
|
| 1117 |
+
|
| 1118 |
prof["smpl_batch"] = time.perf_counter() - t0_smpl
|
| 1119 |
+
|
| 1120 |
+
|
| 1121 |
+
|
| 1122 |
+
# --- Debug Verification ---
|
| 1123 |
+
|
| 1124 |
+
if args.debug:
|
| 1125 |
+
|
| 1126 |
+
try:
|
| 1127 |
+
|
| 1128 |
+
row0 = json.loads(lines[0])
|
| 1129 |
+
|
| 1130 |
+
cam_pos0 = np.asarray(row0["cam_pos_world"], dtype=np.float64).reshape(3)
|
| 1131 |
+
|
| 1132 |
+
cam_q0 = np.asarray(row0["cam_rot_world"], dtype=np.float64).reshape(4)
|
| 1133 |
+
|
| 1134 |
+
pelvis_pos0 = np.asarray(row0["pelvis_pos_world"], dtype=np.float64).reshape(3)
|
| 1135 |
+
|
| 1136 |
+
pelvis_cam_meta0 = np.asarray(row0.get("smpl_incam_transl", [0.0, 0.0, 0.0]), dtype=np.float64).reshape(3)
|
| 1137 |
+
|
| 1138 |
+
|
| 1139 |
+
|
| 1140 |
+
R_cam_w0 = R.from_quat(cam_q0).as_matrix()
|
| 1141 |
+
|
| 1142 |
+
pelvis_cam_est0 = (R_cam_w0.T @ (pelvis_pos0 - cam_pos0).reshape(3, 1)).reshape(3)
|
| 1143 |
+
|
| 1144 |
+
diff0 = pelvis_cam_est0 - pelvis_cam_meta0
|
| 1145 |
+
|
| 1146 |
+
|
| 1147 |
+
|
| 1148 |
+
Log.info(f"[Debug] {seq_name} pelvis_cam_unity check: diff={diff0.round(4)}")
|
| 1149 |
+
|
| 1150 |
+
except Exception as e:
|
| 1151 |
+
|
| 1152 |
+
Log.warning(f"[Debug] {seq_name} pelvis_cam_unity check failed: {e}")
|
| 1153 |
+
|
| 1154 |
+
|
| 1155 |
+
|
| 1156 |
t0_gap = time.perf_counter()
|
| 1157 |
+
|
| 1158 |
smpl_renderer = None
|
| 1159 |
+
|
| 1160 |
vid_incam, vid_global = None, None
|
| 1161 |
+
|
| 1162 |
debug_end_frame = min(num_frames, DEBUG_NUM_FRAMES)
|
| 1163 |
+
|
| 1164 |
if args.debug:
|
| 1165 |
+
|
| 1166 |
os.makedirs(os.path.join(args.output, "debug_renders"), exist_ok=True)
|
| 1167 |
+
|
| 1168 |
if debug_end_frame > 0:
|
| 1169 |
+
|
| 1170 |
try:
|
| 1171 |
+
|
| 1172 |
K4_init = np.asarray(json.loads(lines[0])["cam_intrinsics"], dtype=np.float32)
|
| 1173 |
+
|
| 1174 |
smpl_renderer = SmplIncamRenderer(W, H, K4_init, device=device)
|
| 1175 |
+
|
| 1176 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 1177 |
+
|
| 1178 |
vid_incam = cv2.VideoWriter(os.path.join(args.output, "debug_renders", f"{seq_name}_incam.mp4"), fourcc, FPS, (W, H))
|
| 1179 |
+
|
| 1180 |
+
dbg_gw, dbg_gh = 960, 540
|
| 1181 |
+
|
| 1182 |
+
vid_global = cv2.VideoWriter(os.path.join(args.output, "debug_renders", f"{seq_name}_global.mp4"), fourcc, FPS, (dbg_gw, dbg_gh))
|
| 1183 |
+
|
| 1184 |
except: pass
|
| 1185 |
|
| 1186 |
+
|
| 1187 |
+
|
| 1188 |
# --- MAIN LOOP ---
|
| 1189 |
+
|
| 1190 |
+
coco_subset, img_paths, K_fullimg_all = [], [], []
|
| 1191 |
+
|
| 1192 |
cam_T_wc_cv_all, cam_T_w2c_cv_all = [], []
|
| 1193 |
+
|
| 1194 |
dpvo_poses, dpvo_intrinsics = [], []
|
| 1195 |
+
|
| 1196 |
+
bboxes, bbx_xys_all, kp2d_all = [], [], []
|
| 1197 |
+
|
| 1198 |
global_orient_c_all, transl_c_all, body_pose_all, betas_all = [], [], [], []
|
| 1199 |
+
|
| 1200 |
global_orient_w_all, transl_w_all = [], []
|
|
|
|
| 1201 |
|
| 1202 |
+
vit_img_batch, all_vit_features = [], []
|
| 1203 |
+
|
| 1204 |
+
|
| 1205 |
|
| 1206 |
ret, _ = cap.read() # skip 0
|
| 1207 |
+
|
| 1208 |
prof["prep"] = time.perf_counter() - t0_gap
|
| 1209 |
|
| 1210 |
+
|
| 1211 |
+
|
| 1212 |
t_start_loop = time.perf_counter()
|
| 1213 |
+
|
| 1214 |
for idx in tqdm(range(num_frames), desc="Frames", leave=False):
|
| 1215 |
+
|
| 1216 |
t0_read = time.perf_counter()
|
| 1217 |
+
|
| 1218 |
ret, img_bgr = cap.read()
|
| 1219 |
+
|
| 1220 |
prof["video_read"] += (time.perf_counter() - t0_read)
|
| 1221 |
+
|
| 1222 |
if not ret: break
|
| 1223 |
+
|
| 1224 |
+
|
| 1225 |
+
|
| 1226 |
img_filename = f"img_{idx:05d}.jpg"
|
| 1227 |
+
|
| 1228 |
img_abs_path = os.path.join(out_img_folder, img_filename)
|
| 1229 |
+
|
| 1230 |
+
|
| 1231 |
+
|
| 1232 |
t0_ov = time.perf_counter()
|
| 1233 |
+
|
| 1234 |
chat_aug.maybe_append(idx)
|
| 1235 |
+
|
| 1236 |
chat_aug.draw(img_bgr)
|
| 1237 |
+
|
| 1238 |
ui_aug.draw(img_bgr)
|
| 1239 |
+
|
| 1240 |
prof["overlay"] += (time.perf_counter() - t0_ov)
|
| 1241 |
|
| 1242 |
+
|
| 1243 |
+
|
| 1244 |
+
row = json.loads(lines[idx])
|
| 1245 |
+
|
| 1246 |
K4 = np.asarray(row["cam_intrinsics"], dtype=np.float32)
|
| 1247 |
+
|
| 1248 |
kpts_raw = np.asarray(row["kpts_2d"], dtype=np.float32).reshape(-1, 2)[:17]
|
| 1249 |
+
|
| 1250 |
vis_raw = np.asarray(row["kpts_vis"], dtype=np.int32)[:17]
|
| 1251 |
+
|
| 1252 |
+
if vis_raw.shape[0] >= 5: vis_raw[3] = 1; vis_raw[4] = 1
|
| 1253 |
+
|
| 1254 |
bbox = clamp_bbox_xywh_to_image(row["bbox"], W, H)
|
| 1255 |
+
|
| 1256 |
+
|
| 1257 |
+
|
| 1258 |
sd = smpl_precalc_data[idx]
|
| 1259 |
+
|
| 1260 |
global_orient_c_all.append(sd['go_c'])
|
| 1261 |
+
|
| 1262 |
transl_c_all.append(sd['tr_c'])
|
| 1263 |
+
|
| 1264 |
global_orient_w_all.append(sd['go_w'])
|
| 1265 |
+
|
| 1266 |
transl_w_all.append(sd['tr_w'])
|
| 1267 |
+
|
| 1268 |
body_pose_all.append(sd['bp'])
|
| 1269 |
+
|
| 1270 |
betas_all.append(sd['beta'])
|
| 1271 |
+
|
| 1272 |
+
|
| 1273 |
+
|
| 1274 |
bboxes.append(np.asarray(bbox, dtype=np.float32))
|
| 1275 |
+
|
| 1276 |
bbx_xys_all.append(bbox_xywh_to_bbx_xys(bbox))
|
| 1277 |
+
|
| 1278 |
kp2d_all.append(np.concatenate([kpts_raw, (vis_raw > 0).astype(np.float32)[:, None]], axis=1))
|
| 1279 |
+
|
| 1280 |
K_fullimg_all.append(k4_to_K3(K4))
|
| 1281 |
|
| 1282 |
+
|
| 1283 |
+
|
| 1284 |
img_rel = os.path.join("images", seq_name, img_filename).replace("\\", "/")
|
| 1285 |
+
|
| 1286 |
img_paths.append(img_rel)
|
| 1287 |
+
|
| 1288 |
+
|
| 1289 |
+
|
| 1290 |
+
# Use raw Unity values
|
| 1291 |
+
|
| 1292 |
+
p_w = np.asarray(sd["cam_pos_world"], dtype=np.float32)
|
| 1293 |
+
|
| 1294 |
+
q_w = np.asarray(sd["cam_rot_w_quat"], dtype=np.float32)
|
| 1295 |
+
|
| 1296 |
+
|
| 1297 |
+
|
| 1298 |
+
# 1. Build the Standard Unity-to-CV Matrix (C @ M @ C)
|
| 1299 |
+
|
| 1300 |
cam_T_wc = build_T_wc(p_w, q_w)
|
| 1301 |
+
|
| 1302 |
+
cam_T_wc_cv_raw = (C4 @ cam_T_wc @ C4)
|
| 1303 |
+
|
| 1304 |
+
|
| 1305 |
+
|
| 1306 |
+
# 2. APPLY THE FIX (Z-180) to the Camera, matching precalc loop
|
| 1307 |
+
|
| 1308 |
+
cam_T_wc_cv = (fix_mat @ cam_T_wc_cv_raw).astype(np.float32)
|
| 1309 |
+
|
| 1310 |
+
|
| 1311 |
+
|
| 1312 |
+
# 3. Invert for W2C
|
| 1313 |
+
|
| 1314 |
+
cam_T_w2c_cv = np.linalg.inv(cam_T_wc_cv)
|
| 1315 |
+
|
| 1316 |
+
|
| 1317 |
+
|
| 1318 |
cam_T_wc_cv_all.append(cam_T_wc_cv)
|
| 1319 |
+
|
| 1320 |
cam_T_w2c_cv_all.append(cam_T_w2c_cv)
|
| 1321 |
+
|
| 1322 |
dpvo_poses.append(f"{p_w[0]} {p_w[1]} {p_w[2]} {q_w[0]} {q_w[1]} {q_w[2]} {q_w[3]}")
|
| 1323 |
+
|
| 1324 |
dpvo_intrinsics.append(K4.astype(np.float32))
|
| 1325 |
|
| 1326 |
+
|
| 1327 |
+
|
| 1328 |
if args.genmo and vit_model is not None:
|
| 1329 |
+
|
| 1330 |
t0_vit = time.perf_counter()
|
| 1331 |
+
|
| 1332 |
+
img_tensor = _process_image_memory(img_bgr, bbox, img_size=256)
|
| 1333 |
+
|
| 1334 |
vit_img_batch.append(img_tensor)
|
| 1335 |
|
| 1336 |
if len(vit_img_batch) >= args.vit_batch_size:
|
| 1337 |
+
|
| 1338 |
batch_np = np.stack(vit_img_batch)
|
| 1339 |
+
|
| 1340 |
batch_t = torch.from_numpy(batch_np).to(device, non_blocking=True)
|
| 1341 |
+
|
| 1342 |
with torch.inference_mode():
|
| 1343 |
+
|
| 1344 |
with torch.amp.autocast("cuda"):
|
| 1345 |
+
|
| 1346 |
feats = vit_model({"img": batch_t})
|
| 1347 |
+
|
| 1348 |
all_vit_features.append(feats.detach().cpu())
|
| 1349 |
+
|
| 1350 |
vit_img_batch = []
|
| 1351 |
+
|
| 1352 |
prof["vit_process"] += (time.perf_counter() - t0_vit)
|
| 1353 |
|
| 1354 |
+
|
| 1355 |
+
|
| 1356 |
if args.vitpose and (idx in selected_set):
|
| 1357 |
+
|
| 1358 |
t0_wr = time.perf_counter()
|
| 1359 |
+
|
| 1360 |
cv2.imwrite(img_abs_path, img_bgr, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
|
| 1361 |
+
|
| 1362 |
kpts_coco = []
|
| 1363 |
+
|
| 1364 |
for k in range(17): kpts_coco.extend([float(kpts_raw[k, 0]), float(kpts_raw[k, 1]), int(vis_raw[k])])
|
| 1365 |
+
|
| 1366 |
coco_subset.append(({"file_name": img_rel, "width": W, "height": H},
|
| 1367 |
+
|
| 1368 |
{"category_id": 1, "bbox": bbox, "area": float(bbox[2]*bbox[3]), "iscrowd": 0, "keypoints": kpts_coco, "num_keypoints": int(np.sum(vis_raw > 0))}))
|
| 1369 |
+
|
| 1370 |
prof["sparse_write"] += (time.perf_counter() - t0_wr)
|
| 1371 |
|
| 1372 |
+
|
| 1373 |
+
|
| 1374 |
if args.debug and idx < debug_end_frame and smpl_renderer:
|
| 1375 |
+
|
| 1376 |
t0_dbg = time.perf_counter()
|
| 1377 |
+
|
| 1378 |
dbg = img_bgr.copy()
|
| 1379 |
+
|
| 1380 |
try: draw_bbox_xywh_and_center(dbg, bbox)
|
| 1381 |
+
|
| 1382 |
except: pass
|
| 1383 |
+
|
| 1384 |
try:
|
| 1385 |
+
|
| 1386 |
rgb = smpl_renderer.render(dbg[:, :, ::-1].copy(), sd['go_c'], sd['bp'], sd['beta'], sd['tr_c'], K4[:2], K4[2:])
|
| 1387 |
+
|
| 1388 |
dbg = rgb[:, :, ::-1].copy()
|
| 1389 |
+
|
| 1390 |
except: pass
|
| 1391 |
+
|
| 1392 |
if not args.debug_no_coco:
|
| 1393 |
+
|
| 1394 |
draw_vis_text_and_points(dbg, kpts_raw, vis_raw)
|
| 1395 |
+
|
| 1396 |
if vid_incam: vid_incam.write(dbg)
|
| 1397 |
+
|
| 1398 |
+
|
| 1399 |
+
|
| 1400 |
if vid_global:
|
| 1401 |
+
|
| 1402 |
verts_w = smpl_renderer.get_verts(sd['go_w'], sd['bp'], sd['beta'], sd['tr_w']).float()
|
| 1403 |
+
|
| 1404 |
debug_global_verts_cpu.append(verts_w.detach().cpu())
|
| 1405 |
+
|
| 1406 |
prof["debug_rend"] += (time.perf_counter() - t0_dbg)
|
| 1407 |
|
| 1408 |
+
|
| 1409 |
+
|
| 1410 |
if args.genmo and len(vit_img_batch) > 0 and vit_model is not None:
|
| 1411 |
+
|
| 1412 |
t0_vit = time.perf_counter()
|
| 1413 |
+
|
| 1414 |
batch_np = np.stack(vit_img_batch)
|
| 1415 |
+
|
| 1416 |
batch_t = torch.from_numpy(batch_np).to(device, non_blocking=True)
|
| 1417 |
+
|
| 1418 |
with torch.inference_mode():
|
| 1419 |
+
|
| 1420 |
with torch.amp.autocast("cuda"):
|
| 1421 |
+
|
| 1422 |
feats = vit_model({"img": batch_t})
|
| 1423 |
+
|
| 1424 |
all_vit_features.append(feats.detach().cpu())
|
| 1425 |
+
|
| 1426 |
prof["vit_process"] += (time.perf_counter() - t0_vit)
|
| 1427 |
|
| 1428 |
+
|
| 1429 |
+
|
| 1430 |
prof["loop_total"] = time.perf_counter() - t_start_loop
|
| 1431 |
+
|
| 1432 |
cap.release()
|
| 1433 |
+
|
| 1434 |
if vid_incam: vid_incam.release()
|
| 1435 |
+
|
| 1436 |
+
|
| 1437 |
+
|
| 1438 |
t0_dbg = time.perf_counter()
|
| 1439 |
+
|
| 1440 |
if vid_global and len(debug_global_verts_cpu) > 0:
|
| 1441 |
+
|
| 1442 |
try:
|
| 1443 |
+
|
| 1444 |
+
from hmr4d.utils.vis.renderer import (
|
| 1445 |
+
|
| 1446 |
+
Renderer,
|
| 1447 |
+
|
| 1448 |
+
get_global_cameras_static,
|
| 1449 |
+
|
| 1450 |
+
get_ground_params_from_points,
|
| 1451 |
+
|
| 1452 |
+
perspective_projection,
|
| 1453 |
+
|
| 1454 |
+
)
|
| 1455 |
+
|
| 1456 |
from hmr4d.utils.geo.hmr_cam import create_camera_sensor
|
| 1457 |
+
|
| 1458 |
+
|
| 1459 |
+
|
| 1460 |
+
dbg_gw, dbg_gh = 960, 540
|
| 1461 |
+
|
| 1462 |
+
_, _, K_global = create_camera_sensor(dbg_gw, dbg_gh, 24)
|
| 1463 |
+
|
| 1464 |
+
global_renderer = Renderer(dbg_gw, dbg_gh, device=device, faces=smpl_renderer.faces, K=K_global.to(device), bin_size=0)
|
| 1465 |
+
|
| 1466 |
verts_seq = torch.stack(debug_global_verts_cpu, dim=0)
|
| 1467 |
+
|
| 1468 |
off = verts_seq[0].mean(0); off[1] = verts_seq[0, :, 1].min()
|
| 1469 |
+
|
| 1470 |
verts_seq = verts_seq - off
|
| 1471 |
+
|
| 1472 |
+
|
| 1473 |
+
|
| 1474 |
+
# Convert CV-cam to GPU tensor for visualizer
|
| 1475 |
+
|
| 1476 |
+
cam_centers = None
|
| 1477 |
+
|
| 1478 |
+
try:
|
| 1479 |
+
|
| 1480 |
+
F = int(verts_seq.shape[0])
|
| 1481 |
+
|
| 1482 |
+
if len(cam_T_wc_cv_all) >= F:
|
| 1483 |
+
|
| 1484 |
+
cam_wc = np.stack(cam_T_wc_cv_all[:F], axis=0).astype(np.float32)
|
| 1485 |
+
|
| 1486 |
+
cam_centers = torch.from_numpy(cam_wc[:, :3, 3]).to(device=device)
|
| 1487 |
+
|
| 1488 |
+
cam_centers = cam_centers - off.to(device=device)[None]
|
| 1489 |
+
|
| 1490 |
+
except Exception:
|
| 1491 |
+
|
| 1492 |
+
cam_centers = None
|
| 1493 |
+
|
| 1494 |
+
|
| 1495 |
+
|
| 1496 |
+
g_R, g_T, g_L = get_global_cameras_static(
|
| 1497 |
+
|
| 1498 |
+
verts_seq, beta=2.0, cam_height_degree=20, target_center_height=1.0, device=device
|
| 1499 |
+
|
| 1500 |
+
)
|
| 1501 |
+
|
| 1502 |
+
|
| 1503 |
+
|
| 1504 |
if global_J_reg is not None and verts_seq.shape[1] == global_J_reg.shape[-1]:
|
| 1505 |
+
|
| 1506 |
+
joints_seq = torch.einsum("jv,fvk->fjk", global_J_reg.cpu(), verts_seq)
|
| 1507 |
+
|
| 1508 |
+
roots = joints_seq[:, 0]
|
| 1509 |
+
|
| 1510 |
+
else:
|
| 1511 |
+
|
| 1512 |
+
roots = verts_seq.mean(1)
|
| 1513 |
+
|
| 1514 |
sc, cx, cz = get_ground_params_from_points(roots, verts_seq)
|
| 1515 |
+
|
| 1516 |
global_renderer.set_ground(sc * 1.5, cx, cz)
|
| 1517 |
+
|
| 1518 |
col = torch.tensor([[0.0, 1.0, 0.0]], device=device)
|
| 1519 |
+
|
| 1520 |
+
trail = []
|
| 1521 |
+
|
| 1522 |
+
|
| 1523 |
+
|
| 1524 |
+
def _project_xy(points_w: torch.Tensor):
|
| 1525 |
+
|
| 1526 |
+
P = points_w.view(1, -1, 3)
|
| 1527 |
+
|
| 1528 |
+
x2d = perspective_projection(P, global_renderer.K, global_renderer.R, global_renderer.T.reshape(1, 3, 1))[0]
|
| 1529 |
+
|
| 1530 |
+
return x2d
|
| 1531 |
+
|
| 1532 |
+
|
| 1533 |
+
|
| 1534 |
+
def _draw_polyline(img_bgr, pts_xy, color, closed=False, thickness=1):
|
| 1535 |
+
|
| 1536 |
+
pts = np.asarray(pts_xy, dtype=np.int32).reshape(-1, 1, 2)
|
| 1537 |
+
|
| 1538 |
+
if len(pts) < 2: return
|
| 1539 |
+
|
| 1540 |
+
cv2.polylines(img_bgr, [pts], bool(closed), color, int(thickness), cv2.LINE_AA)
|
| 1541 |
+
|
| 1542 |
+
|
| 1543 |
+
|
| 1544 |
+
def _draw_camera_box_axes(img_bgr, C_w, right, up, fwd, scale=0.25):
|
| 1545 |
+
|
| 1546 |
+
C_w = C_w.reshape(3)
|
| 1547 |
+
|
| 1548 |
+
right = right.reshape(3)
|
| 1549 |
+
|
| 1550 |
+
up = up.reshape(3)
|
| 1551 |
+
|
| 1552 |
+
fwd = fwd.reshape(3)
|
| 1553 |
+
|
| 1554 |
+
L = float(scale)
|
| 1555 |
+
|
| 1556 |
+
|
| 1557 |
+
|
| 1558 |
+
# Draw Axis instead of just box (RGB = XYZ)
|
| 1559 |
+
|
| 1560 |
+
# X (Right) - Red
|
| 1561 |
+
|
| 1562 |
+
p_x = C_w + L * right
|
| 1563 |
+
|
| 1564 |
+
xy_x = _project_xy(torch.stack([C_w, p_x])).detach().cpu().numpy()
|
| 1565 |
+
|
| 1566 |
+
_draw_polyline(img_bgr, xy_x, (0, 0, 255), thickness=2)
|
| 1567 |
+
|
| 1568 |
+
|
| 1569 |
+
|
| 1570 |
+
# Y (Up/Down) - Green
|
| 1571 |
+
|
| 1572 |
+
p_y = C_w + L * up
|
| 1573 |
+
|
| 1574 |
+
xy_y = _project_xy(torch.stack([C_w, p_y])).detach().cpu().numpy()
|
| 1575 |
+
|
| 1576 |
+
_draw_polyline(img_bgr, xy_y, (0, 255, 0), thickness=2)
|
| 1577 |
+
|
| 1578 |
+
|
| 1579 |
+
|
| 1580 |
+
# Z (Fwd) - Blue
|
| 1581 |
+
|
| 1582 |
+
p_z = C_w + L * fwd
|
| 1583 |
+
|
| 1584 |
+
xy_z = _project_xy(torch.stack([C_w, p_z])).detach().cpu().numpy()
|
| 1585 |
+
|
| 1586 |
+
_draw_polyline(img_bgr, xy_z, (255, 0, 0), thickness=2)
|
| 1587 |
+
|
| 1588 |
+
|
| 1589 |
+
|
| 1590 |
for i in range(len(verts_seq)):
|
| 1591 |
+
|
| 1592 |
cam = global_renderer.create_camera(g_R[i], g_T[i])
|
| 1593 |
+
|
| 1594 |
img = global_renderer.render_with_ground(verts_seq[i].to(device)[None], col, cam, g_L)
|
| 1595 |
+
|
| 1596 |
+
img_bgr = img[:, :, ::-1].copy()
|
| 1597 |
+
|
| 1598 |
+
|
| 1599 |
+
|
| 1600 |
+
if cam_centers is not None and i < cam_centers.shape[0]:
|
| 1601 |
+
|
| 1602 |
+
try:
|
| 1603 |
+
|
| 1604 |
+
# Blue ray: camera center -> SMPL root
|
| 1605 |
+
|
| 1606 |
+
if i < roots.shape[0]:
|
| 1607 |
+
|
| 1608 |
+
pts_line = torch.stack([cam_centers[i], roots[i].to(device=device)], dim=0)
|
| 1609 |
+
|
| 1610 |
+
xy_line = _project_xy(pts_line).detach().cpu().numpy()
|
| 1611 |
+
|
| 1612 |
+
_draw_polyline(img_bgr, xy_line, (255, 200, 50), closed=False, thickness=1)
|
| 1613 |
+
|
| 1614 |
+
|
| 1615 |
+
|
| 1616 |
+
P = cam_centers[i].view(1, 3)
|
| 1617 |
+
|
| 1618 |
+
x2d = _project_xy(P)[0]
|
| 1619 |
+
|
| 1620 |
+
x, y = int(round(float(x2d[0].item()))), int(round(float(x2d[1].item())))
|
| 1621 |
+
|
| 1622 |
+
if 0 <= x < img_bgr.shape[1] and 0 <= y < img_bgr.shape[0]:
|
| 1623 |
+
|
| 1624 |
+
trail.append((x, y))
|
| 1625 |
+
|
| 1626 |
+
cv2.circle(img_bgr, (x, y), 3, (0, 0, 255), -1)
|
| 1627 |
+
|
| 1628 |
+
if len(trail) >= 2:
|
| 1629 |
+
|
| 1630 |
+
cv2.polylines(img_bgr, [np.array(trail, dtype=np.int32)], False, (0, 0, 255), 1)
|
| 1631 |
+
|
| 1632 |
+
|
| 1633 |
+
|
| 1634 |
+
if len(cam_T_wc_cv_all) > i:
|
| 1635 |
+
|
| 1636 |
+
R_c2w = torch.from_numpy(np.asarray(cam_T_wc_cv_all[i], dtype=np.float32)[:3, :3]).to(device=device)
|
| 1637 |
+
|
| 1638 |
+
C_w = cam_centers[i]
|
| 1639 |
+
|
| 1640 |
+
right = R_c2w[:, 0]
|
| 1641 |
+
|
| 1642 |
+
up = R_c2w[:, 1]
|
| 1643 |
+
|
| 1644 |
+
fwd = R_c2w[:, 2]
|
| 1645 |
+
|
| 1646 |
+
_draw_camera_box_axes(img_bgr, C_w, right, up, fwd, scale=0.35)
|
| 1647 |
+
|
| 1648 |
+
except Exception: pass
|
| 1649 |
+
|
| 1650 |
+
|
| 1651 |
+
|
| 1652 |
+
vid_global.write(img_bgr)
|
| 1653 |
+
|
| 1654 |
except: pass
|
| 1655 |
+
|
| 1656 |
vid_global.release()
|
| 1657 |
+
|
| 1658 |
prof["debug_rend"] += (time.perf_counter() - t0_dbg)
|
| 1659 |
+
|
| 1660 |
+
|
| 1661 |
+
|
| 1662 |
t0_save = time.perf_counter()
|
| 1663 |
+
|
| 1664 |
if args.genmo:
|
| 1665 |
+
|
| 1666 |
trans_w = np.stack(transl_w_all).astype(np.float32)
|
| 1667 |
+
|
| 1668 |
world_off = trans_w[0].copy(); world_off[1] -= float(args.world_y_offset_m)
|
| 1669 |
+
|
| 1670 |
trans_w_centered = trans_w - world_off[None]
|
| 1671 |
+
|
| 1672 |
mats_w2c = np.stack(cam_T_w2c_cv_all).astype(np.float32)
|
| 1673 |
+
|
| 1674 |
mats_wc = np.stack(cam_T_wc_cv_all).astype(np.float32)
|
| 1675 |
+
|
| 1676 |
T_wp_w = np.eye(4, dtype=np.float32); T_wp_w[:3, 3] = world_off
|
| 1677 |
+
|
| 1678 |
T_w_wp = np.eye(4, dtype=np.float32); T_w_wp[:3, 3] = -world_off
|
| 1679 |
+
|
| 1680 |
mats_w2c_c = np.matmul(mats_w2c, T_wp_w[None])
|
| 1681 |
+
|
| 1682 |
mats_wc_c = np.matmul(T_w_wp[None], mats_wc)
|
| 1683 |
+
|
| 1684 |
cam_av, cam_tv = compute_velocity(mats_wc_c, fps=FPS)
|
| 1685 |
+
|
| 1686 |
+
|
| 1687 |
+
|
| 1688 |
f_imgseq = torch.cat(all_vit_features, dim=0).float() if all_vit_features else torch.empty(0)
|
| 1689 |
|
| 1690 |
+
|
| 1691 |
+
|
| 1692 |
g_dict = {
|
| 1693 |
+
|
| 1694 |
"smpl_params_c": {"global_orient": torch.from_numpy(np.stack(global_orient_c_all)), "body_pose": torch.from_numpy(np.stack(body_pose_all)), "transl": torch.from_numpy(np.stack(transl_c_all)), "betas": torch.from_numpy(np.stack(betas_all))},
|
| 1695 |
+
|
| 1696 |
"smpl_params_w": {"global_orient": torch.from_numpy(np.stack(global_orient_w_all)), "body_pose": torch.from_numpy(np.stack(body_pose_all)), "transl": torch.from_numpy(trans_w_centered), "betas": torch.from_numpy(np.stack(betas_all))},
|
| 1697 |
+
|
| 1698 |
"T_w2c": torch.from_numpy(mats_w2c_c), "K_fullimg": torch.from_numpy(np.stack(K_fullimg_all)),
|
| 1699 |
+
|
| 1700 |
"kp2d": torch.from_numpy(np.stack(kp2d_all)), "bbx_xys": torch.from_numpy(np.stack(bbx_xys_all)),
|
| 1701 |
+
|
| 1702 |
"cam_angvel": torch.from_numpy(cam_av), "cam_tvel": torch.from_numpy(cam_tv),
|
| 1703 |
+
|
| 1704 |
"imgname": img_paths, "valid_mask": torch.ones(len(img_paths), dtype=torch.float32),
|
| 1705 |
+
|
| 1706 |
"world_offset": torch.from_numpy(world_off.astype(np.float32)),
|
| 1707 |
+
|
| 1708 |
+
"f_imgseq": f_imgseq
|
| 1709 |
+
|
| 1710 |
}
|
| 1711 |
+
|
| 1712 |
torch.save(g_dict, genmo_out)
|
| 1713 |
|
| 1714 |
+
|
| 1715 |
+
|
| 1716 |
if args.smplx:
|
| 1717 |
+
|
| 1718 |
poses66 = np.concatenate([np.stack(global_orient_w_all), np.stack(body_pose_all)], axis=1)
|
| 1719 |
+
|
| 1720 |
poses165 = np.pad(poses66, ((0,0),(0,99)), mode="constant").astype(np.float32)
|
| 1721 |
+
|
| 1722 |
trans_w = np.stack(transl_w_all).astype(np.float32)
|
| 1723 |
+
|
| 1724 |
world_off = trans_w[0].copy(); world_off[1] -= float(args.world_y_offset_m)
|
| 1725 |
+
|
| 1726 |
trans_w = trans_w - world_off[None]
|
| 1727 |
+
|
| 1728 |
np.savez(smplx_global_out, mocap_framerate=int(FPS), gender="neutral", betas=betas_all[0], trans=trans_w, poses=poses165, world_offset=world_off)
|
| 1729 |
|
| 1730 |
+
|
| 1731 |
+
|
| 1732 |
if args.vitpose and coco_subset:
|
| 1733 |
+
|
| 1734 |
with open(os.path.join(temp_ann_dir, f"{seq_name}.json"), "w") as f: json.dump(coco_subset, f)
|
| 1735 |
+
|
| 1736 |
+
|
| 1737 |
+
|
| 1738 |
prof["save_files"] = time.perf_counter() - t0_save
|
| 1739 |
+
|
| 1740 |
total_t = time.perf_counter() - t_start_seq
|
| 1741 |
+
|
| 1742 |
+
|
| 1743 |
+
|
| 1744 |
print(f" > Done in {total_t:.2f}s | FPS: {num_frames/total_t:.1f}")
|
| 1745 |
+
|
| 1746 |
print(f" [Breakdown] BatchPrep: {prof['smpl_batch']:.2f}s | Init/Gap: {prof['prep']:.2f}s | Read: {prof['video_read']:.2f}s")
|
| 1747 |
+
|
| 1748 |
print(f" Overlay: {prof['overlay']:.2f}s | SparseWrite: {prof['sparse_write']:.2f}s | ViT: {prof['vit_process']:.2f}s")
|
| 1749 |
+
|
| 1750 |
print(f" DbgRend: {prof['debug_rend']:.2f}s | SaveFiles: {prof['save_files']:.2f}s")
|
| 1751 |
|
| 1752 |
+
|
| 1753 |
+
|
| 1754 |
print("All sequences processed.")
|
| 1755 |
|
| 1756 |
+
|
| 1757 |
+
|
| 1758 |
if __name__ == "__main__":
|
| 1759 |
+
|
| 1760 |
+
main()
|
train.log
CHANGED
|
@@ -3197,3 +3197,635 @@ full_key: dataset_opts.train.unity
|
|
| 3197 |
[12/30 22:16:05][INFO] [UnityDataset] Found 5 sequences.
|
| 3198 |
[12/30 22:16:05][INFO] [Val Dataset][7/7]: name=unity_val, size=5, genmo.datasets.unity_dataset.UnityDataset
|
| 3199 |
[12/30 22:16:05][INFO]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3197 |
[12/30 22:16:05][INFO] [UnityDataset] Found 5 sequences.
|
| 3198 |
[12/30 22:16:05][INFO] [Val Dataset][7/7]: name=unity_val, size=5, genmo.datasets.unity_dataset.UnityDataset
|
| 3199 |
[12/30 22:16:05][INFO]
|
| 3200 |
+
[12/30 22:26:06][INFO] [Exp Name]: finetune_
|
| 3201 |
+
[12/30 22:26:06][INFO] [GPU x Batch] = 1 x 1
|
| 3202 |
+
[12/30 22:26:06][INFO] [UnityDataset] Found 5 sequences.
|
| 3203 |
+
[12/30 22:26:06][INFO] [Train Dataset][9/9]: name=unity, size=5, genmo.datasets.unity_dataset.UnityDataset
|
| 3204 |
+
[12/30 22:26:06][INFO] [Train Dataset][All]: ConcatDataset size=5
|
| 3205 |
+
[12/30 22:26:06][INFO]
|
| 3206 |
+
[12/30 22:26:06][INFO] [UnityDataset] Found 5 sequences.
|
| 3207 |
+
[12/30 22:26:06][INFO] [Val Dataset][7/7]: name=unity_val, size=5, genmo.datasets.unity_dataset.UnityDataset
|
| 3208 |
+
[12/30 22:26:06][INFO]
|
| 3209 |
+
[12/30 22:26:11][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3210 |
+
[12/30 22:26:42][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_3/checkpoints'
|
| 3211 |
+
[12/30 22:26:54][INFO] Start Fitting...
|
| 3212 |
+
[12/30 22:26:56][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3213 |
+
|
| 3214 |
+
[12/30 22:26:56][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3215 |
+
|
| 3216 |
+
[12/30 22:26:56][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3217 |
+
|
| 3218 |
+
[12/30 22:26:56][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3219 |
+
[12/30 22:27:28][INFO] [Exp Name]: finetune_
|
| 3220 |
+
[12/30 22:27:28][INFO] [GPU x Batch] = 1 x 1
|
| 3221 |
+
[12/30 22:27:28][INFO] [UnityDataset] Found 5 sequences.
|
| 3222 |
+
[12/30 22:27:28][INFO] [Train Dataset][9/9]: name=unity, size=5, genmo.datasets.unity_dataset.UnityDataset
|
| 3223 |
+
[12/30 22:27:28][INFO] [Train Dataset][All]: ConcatDataset size=5
|
| 3224 |
+
[12/30 22:27:28][INFO]
|
| 3225 |
+
[12/30 22:27:28][INFO] [UnityDataset] Found 5 sequences.
|
| 3226 |
+
[12/30 22:27:28][INFO] [Val Dataset][7/7]: name=unity_val, size=5, genmo.datasets.unity_dataset.UnityDataset
|
| 3227 |
+
[12/30 22:27:28][INFO]
|
| 3228 |
+
[12/30 22:27:37][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3229 |
+
[12/30 22:27:56][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_4/checkpoints'
|
| 3230 |
+
[12/30 22:28:08][INFO] Start Fitting...
|
| 3231 |
+
[12/30 22:28:11][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3232 |
+
|
| 3233 |
+
[12/30 22:28:11][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3234 |
+
|
| 3235 |
+
[12/30 22:28:11][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3236 |
+
|
| 3237 |
+
[12/30 22:28:11][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3238 |
+
[12/30 22:29:56][INFO] [Exp Name]: finetune_
|
| 3239 |
+
[12/30 22:29:56][INFO] [GPU x Batch] = 1 x 1
|
| 3240 |
+
[12/30 22:29:56][INFO] [UnityDataset] Found 2 sequences.
|
| 3241 |
+
[12/30 22:29:56][INFO] [Train Dataset][9/9]: name=unity, size=2, genmo.datasets.unity_dataset.UnityDataset
|
| 3242 |
+
[12/30 22:29:56][INFO] [Train Dataset][All]: ConcatDataset size=2
|
| 3243 |
+
[12/30 22:29:56][INFO]
|
| 3244 |
+
[12/30 22:29:56][INFO] [UnityDataset] Found 2 sequences.
|
| 3245 |
+
[12/30 22:29:56][INFO] [Val Dataset][7/7]: name=unity_val, size=2, genmo.datasets.unity_dataset.UnityDataset
|
| 3246 |
+
[12/30 22:29:56][INFO]
|
| 3247 |
+
[12/30 22:30:02][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3248 |
+
[12/30 22:30:17][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_5/checkpoints'
|
| 3249 |
+
[12/30 22:30:30][INFO] Start Fitting...
|
| 3250 |
+
[12/30 22:30:31][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3251 |
+
|
| 3252 |
+
[12/30 22:30:31][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3253 |
+
|
| 3254 |
+
[12/30 22:30:31][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3255 |
+
|
| 3256 |
+
[12/30 22:30:31][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3257 |
+
[12/30 22:56:38][INFO] [Exp Name]: finetune_
|
| 3258 |
+
[12/30 22:56:38][INFO] [GPU x Batch] = 1 x 1
|
| 3259 |
+
[12/30 22:56:38][INFO] [UnityDataset] Found 6 sequences.
|
| 3260 |
+
[12/30 22:56:38][INFO] [Train Dataset][9/9]: name=unity, size=6, genmo.datasets.unity_dataset.UnityDataset
|
| 3261 |
+
[12/30 22:56:38][INFO] [Train Dataset][All]: ConcatDataset size=6
|
| 3262 |
+
[12/30 22:56:38][INFO]
|
| 3263 |
+
[12/30 22:56:38][INFO] [UnityDataset] Found 6 sequences.
|
| 3264 |
+
[12/30 22:56:38][INFO] [Val Dataset][7/7]: name=unity_val, size=6, genmo.datasets.unity_dataset.UnityDataset
|
| 3265 |
+
[12/30 22:56:38][INFO]
|
| 3266 |
+
[12/30 22:56:44][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3267 |
+
[12/30 22:57:07][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_6/checkpoints'
|
| 3268 |
+
[12/30 22:57:27][INFO] Start Fitting...
|
| 3269 |
+
[12/30 22:57:30][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3270 |
+
|
| 3271 |
+
[12/30 22:57:30][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3272 |
+
|
| 3273 |
+
[12/30 22:57:31][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3274 |
+
|
| 3275 |
+
[12/30 22:57:31][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3276 |
+
[12/30 22:57:34][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3277 |
+
return F.conv1d(input, weight, bias, self.stride,
|
| 3278 |
+
|
| 3279 |
+
[12/30 22:57:36][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3280 |
+
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 3281 |
+
|
| 3282 |
+
[12/30 22:57:40][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
| 3283 |
+
warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
|
| 3284 |
+
|
| 3285 |
+
[12/30 22:57:47][WARNING] [VisUnityVal] Failed to read image: third_party/GVHMR/processed_dataset/images/0_biboo_birthday_speech/img_00699.jpg
|
| 3286 |
+
[12/30 22:58:14][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3287 |
+
|
| 3288 |
+
[12/30 22:58:14][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3289 |
+
|
| 3290 |
+
[12/30 22:58:14][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3291 |
+
|
| 3292 |
+
[12/30 22:58:14][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3293 |
+
|
| 3294 |
+
[12/30 22:58:14][INFO] β
[FIT][Epoch 0] finished! 00:46β03:06 | loss_epoch=28
|
| 3295 |
+
[12/30 23:01:18][INFO] [Exp Name]: finetune_
|
| 3296 |
+
[12/30 23:01:18][INFO] [GPU x Batch] = 1 x 1
|
| 3297 |
+
[12/30 23:01:18][INFO] [UnityDataset] Found 6 sequences.
|
| 3298 |
+
[12/30 23:01:18][INFO] [Train Dataset][9/9]: name=unity, size=6, genmo.datasets.unity_dataset.UnityDataset
|
| 3299 |
+
[12/30 23:01:18][INFO] [Train Dataset][All]: ConcatDataset size=6
|
| 3300 |
+
[12/30 23:01:18][INFO]
|
| 3301 |
+
[12/30 23:01:18][INFO] [UnityDataset] Found 6 sequences.
|
| 3302 |
+
[12/30 23:01:18][INFO] [Val Dataset][7/7]: name=unity_val, size=6, genmo.datasets.unity_dataset.UnityDataset
|
| 3303 |
+
[12/30 23:01:18][INFO]
|
| 3304 |
+
[12/30 23:01:26][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3305 |
+
[12/30 23:01:45][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_0/checkpoints'
|
| 3306 |
+
[12/30 23:01:57][INFO] Start Fitting...
|
| 3307 |
+
[12/30 23:01:59][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3308 |
+
|
| 3309 |
+
[12/30 23:01:59][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3310 |
+
|
| 3311 |
+
[12/30 23:01:59][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3312 |
+
|
| 3313 |
+
[12/30 23:01:59][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3314 |
+
[12/30 23:02:01][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3315 |
+
return F.conv1d(input, weight, bias, self.stride,
|
| 3316 |
+
|
| 3317 |
+
[12/30 23:02:03][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3318 |
+
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 3319 |
+
|
| 3320 |
+
[12/30 23:02:07][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
| 3321 |
+
warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
|
| 3322 |
+
|
| 3323 |
+
[12/30 23:02:13][WARNING] [VisUnityVal] Failed to read image: third_party/GVHMR/processed_dataset/images/0_biboo_birthday_speech/img_00699.jpg
|
| 3324 |
+
[12/30 23:02:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3325 |
+
|
| 3326 |
+
[12/30 23:02:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3327 |
+
|
| 3328 |
+
[12/30 23:02:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3329 |
+
|
| 3330 |
+
[12/30 23:02:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3331 |
+
|
| 3332 |
+
[12/30 23:02:41][INFO] β
[FIT][Epoch 0] finished! 00:43β02:53 | loss_epoch=28
|
| 3333 |
+
[12/30 23:09:40][INFO] [Exp Name]: finetune_
|
| 3334 |
+
[12/30 23:09:40][INFO] [GPU x Batch] = 1 x 1
|
| 3335 |
+
[12/30 23:09:41][INFO] [UnityDataset] Found 6 sequences.
|
| 3336 |
+
[12/30 23:09:41][INFO] [Train Dataset][9/9]: name=unity, size=6, genmo.datasets.unity_dataset.UnityDataset
|
| 3337 |
+
[12/30 23:09:41][INFO] [Train Dataset][All]: ConcatDataset size=6
|
| 3338 |
+
[12/30 23:09:41][INFO]
|
| 3339 |
+
[12/30 23:09:41][INFO] [UnityDataset] Found 6 sequences.
|
| 3340 |
+
[12/30 23:09:41][INFO] [Val Dataset][7/7]: name=unity_val, size=6, genmo.datasets.unity_dataset.UnityDataset
|
| 3341 |
+
[12/30 23:09:41][INFO]
|
| 3342 |
+
[12/30 23:09:49][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3343 |
+
[12/30 23:10:08][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_1/checkpoints'
|
| 3344 |
+
[12/30 23:10:17][INFO] Start Fitting...
|
| 3345 |
+
[12/30 23:10:18][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3346 |
+
|
| 3347 |
+
[12/30 23:10:18][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3348 |
+
|
| 3349 |
+
[12/30 23:10:18][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3350 |
+
|
| 3351 |
+
[12/30 23:10:18][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3352 |
+
[12/30 23:10:19][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3353 |
+
return F.conv1d(input, weight, bias, self.stride,
|
| 3354 |
+
|
| 3355 |
+
[12/30 23:10:20][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3356 |
+
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 3357 |
+
|
| 3358 |
+
[12/30 23:10:24][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
| 3359 |
+
warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
|
| 3360 |
+
|
| 3361 |
+
[12/30 23:10:44][INFO] [Exp Name]: finetune_
|
| 3362 |
+
[12/30 23:10:44][INFO] [GPU x Batch] = 1 x 1
|
| 3363 |
+
[12/30 23:10:44][INFO] [UnityDataset] Found 6 sequences.
|
| 3364 |
+
[12/30 23:10:44][INFO] [Train Dataset][9/9]: name=unity, size=6, genmo.datasets.unity_dataset.UnityDataset
|
| 3365 |
+
[12/30 23:10:44][INFO] [Train Dataset][All]: ConcatDataset size=6
|
| 3366 |
+
[12/30 23:10:44][INFO]
|
| 3367 |
+
[12/30 23:10:44][INFO] [UnityDataset] Found 6 sequences.
|
| 3368 |
+
[12/30 23:10:44][INFO] [Val Dataset][7/7]: name=unity_val, size=6, genmo.datasets.unity_dataset.UnityDataset
|
| 3369 |
+
[12/30 23:10:44][INFO]
|
| 3370 |
+
[12/30 23:10:52][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3371 |
+
[12/30 23:11:04][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_2/checkpoints'
|
| 3372 |
+
[12/30 23:11:11][INFO] Start Fitting...
|
| 3373 |
+
[12/30 23:11:13][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3374 |
+
|
| 3375 |
+
[12/30 23:11:13][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3376 |
+
|
| 3377 |
+
[12/30 23:11:13][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3378 |
+
|
| 3379 |
+
[12/30 23:11:13][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3380 |
+
[12/30 23:11:14][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3381 |
+
return F.conv1d(input, weight, bias, self.stride,
|
| 3382 |
+
|
| 3383 |
+
[12/30 23:11:15][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3384 |
+
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 3385 |
+
|
| 3386 |
+
[12/30 23:11:19][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
| 3387 |
+
warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
|
| 3388 |
+
|
| 3389 |
+
[12/30 23:11:27][WARNING] [VisUnityVal] Failed to read image: third_party/GVHMR/processed_dataset/images/0_biboo_birthday_speech/img_00699.jpg
|
| 3390 |
+
[12/30 23:11:53][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3391 |
+
|
| 3392 |
+
[12/30 23:11:53][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3393 |
+
|
| 3394 |
+
[12/30 23:11:53][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3395 |
+
|
| 3396 |
+
[12/30 23:11:53][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3397 |
+
|
| 3398 |
+
[12/30 23:11:53][INFO] β
[FIT][Epoch 0] finished! 00:40β02:43 | loss_epoch=28
|
| 3399 |
+
[12/30 23:29:33][INFO] [Exp Name]: finetune_
|
| 3400 |
+
[12/30 23:29:33][INFO] [GPU x Batch] = 1 x 1
|
| 3401 |
+
[12/30 23:29:33][INFO] [UnityDataset] Found 2 sequences.
|
| 3402 |
+
[12/30 23:29:33][INFO] [Train Dataset][9/9]: name=unity, size=2, genmo.datasets.unity_dataset.UnityDataset
|
| 3403 |
+
[12/30 23:29:33][INFO] [Train Dataset][All]: ConcatDataset size=2
|
| 3404 |
+
[12/30 23:29:33][INFO]
|
| 3405 |
+
[12/30 23:29:33][INFO] [UnityDataset] Found 2 sequences.
|
| 3406 |
+
[12/30 23:29:33][INFO] [Val Dataset][7/7]: name=unity_val, size=2, genmo.datasets.unity_dataset.UnityDataset
|
| 3407 |
+
[12/30 23:29:33][INFO]
|
| 3408 |
+
[12/30 23:29:39][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3409 |
+
[12/30 23:30:02][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_3/checkpoints'
|
| 3410 |
+
[12/30 23:30:13][INFO] Start Fitting...
|
| 3411 |
+
[12/30 23:30:14][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3412 |
+
|
| 3413 |
+
[12/30 23:30:14][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3414 |
+
|
| 3415 |
+
[12/30 23:30:14][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3416 |
+
|
| 3417 |
+
[12/30 23:30:14][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3418 |
+
[12/30 23:30:15][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3419 |
+
return F.conv1d(input, weight, bias, self.stride,
|
| 3420 |
+
|
| 3421 |
+
[12/30 23:30:17][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3422 |
+
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 3423 |
+
|
| 3424 |
+
[12/30 23:30:18][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
| 3425 |
+
warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
|
| 3426 |
+
|
| 3427 |
+
[12/30 23:30:30][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9930 pred=+0.9643 delta(pred-gt)=-0.0287
|
| 3428 |
+
[12/30 23:30:30][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.03876931 -0.17480041 0.02509396] global_orient0_aa(pred)=[-0.1090048 -1.7763788 -0.15125035]
|
| 3429 |
+
[12/30 23:30:30][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(-9.99,+2.34,+1.24) pred=(-101.99,-9.31,-0.53) pred_vs_gt=(-91.73,-11.16,-3.80)
|
| 3430 |
+
[12/30 23:30:30][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=+94.23
|
| 3431 |
+
[12/30 23:46:28][INFO] [Exp Name]: finetune_
|
| 3432 |
+
[12/30 23:46:28][INFO] [GPU x Batch] = 1 x 1
|
| 3433 |
+
[12/30 23:46:28][INFO] [UnityDataset] Found 2 sequences.
|
| 3434 |
+
[12/30 23:46:28][INFO] [Train Dataset][9/9]: name=unity, size=2, genmo.datasets.unity_dataset.UnityDataset
|
| 3435 |
+
[12/30 23:46:28][INFO] [Train Dataset][All]: ConcatDataset size=2
|
| 3436 |
+
[12/30 23:46:28][INFO]
|
| 3437 |
+
[12/30 23:46:28][INFO] [UnityDataset] Found 2 sequences.
|
| 3438 |
+
[12/30 23:46:28][INFO] [Val Dataset][7/7]: name=unity_val, size=2, genmo.datasets.unity_dataset.UnityDataset
|
| 3439 |
+
[12/30 23:46:28][INFO]
|
| 3440 |
+
[12/30 23:46:34][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3441 |
+
[12/30 23:46:54][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_4/checkpoints'
|
| 3442 |
+
[12/30 23:47:05][INFO] Start Fitting...
|
| 3443 |
+
[12/30 23:47:07][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3444 |
+
|
| 3445 |
+
[12/30 23:47:07][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3446 |
+
|
| 3447 |
+
[12/30 23:47:07][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3448 |
+
|
| 3449 |
+
[12/30 23:47:07][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3450 |
+
[12/30 23:47:09][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3451 |
+
return F.conv1d(input, weight, bias, self.stride,
|
| 3452 |
+
|
| 3453 |
+
[12/30 23:47:11][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3454 |
+
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 3455 |
+
|
| 3456 |
+
[12/30 23:47:12][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
| 3457 |
+
warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
|
| 3458 |
+
|
| 3459 |
+
[12/30 23:47:26][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9930 pred=+0.9643 delta(pred-gt)=-0.0287
|
| 3460 |
+
[12/30 23:47:26][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.03876931 -0.17480041 0.02509396] global_orient0_aa(pred)=[-0.1090048 -1.7763788 -0.15125035]
|
| 3461 |
+
[12/30 23:47:26][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(-9.99,+2.34,+1.24) pred=(-101.99,-9.31,-0.53) pred_vs_gt=(-91.73,-11.16,-3.80)
|
| 3462 |
+
[12/30 23:47:26][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=+94.23
|
| 3463 |
+
[12/30 23:51:42][INFO] [Exp Name]: finetune_
|
| 3464 |
+
[12/30 23:51:42][INFO] [GPU x Batch] = 1 x 1
|
| 3465 |
+
[12/30 23:51:42][INFO] [UnityDataset] Found 2 sequences.
|
| 3466 |
+
[12/30 23:51:42][INFO] [Train Dataset][9/9]: name=unity, size=2, genmo.datasets.unity_dataset.UnityDataset
|
| 3467 |
+
[12/30 23:51:42][INFO] [Train Dataset][All]: ConcatDataset size=2
|
| 3468 |
+
[12/30 23:51:42][INFO]
|
| 3469 |
+
[12/30 23:51:42][INFO] [UnityDataset] Found 2 sequences.
|
| 3470 |
+
[12/30 23:51:42][INFO] [Val Dataset][7/7]: name=unity_val, size=2, genmo.datasets.unity_dataset.UnityDataset
|
| 3471 |
+
[12/30 23:51:42][INFO]
|
| 3472 |
+
[12/30 23:51:48][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3473 |
+
[12/30 23:52:04][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_5/checkpoints'
|
| 3474 |
+
[12/30 23:52:15][INFO] Start Fitting...
|
| 3475 |
+
[12/30 23:52:16][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3476 |
+
|
| 3477 |
+
[12/30 23:52:16][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3478 |
+
|
| 3479 |
+
[12/30 23:52:16][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3480 |
+
|
| 3481 |
+
[12/30 23:52:16][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3482 |
+
[12/30 23:52:18][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3483 |
+
return F.conv1d(input, weight, bias, self.stride,
|
| 3484 |
+
|
| 3485 |
+
[12/30 23:52:20][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3486 |
+
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 3487 |
+
|
| 3488 |
+
[12/30 23:52:21][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
| 3489 |
+
warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
|
| 3490 |
+
|
| 3491 |
+
[12/30 23:52:35][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9930 pred=+0.9643 delta(pred-gt)=-0.0287
|
| 3492 |
+
[12/30 23:52:35][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.03876931 -0.17480041 0.02509396] global_orient0_aa(pred)=[-0.1090048 -1.7763788 -0.15125035]
|
| 3493 |
+
[12/30 23:52:35][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(-9.99,+2.34,+1.24) pred=(-101.99,-9.31,-0.53) pred_vs_gt=(-91.73,-11.16,-3.80)
|
| 3494 |
+
[12/30 23:52:35][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=+94.23
|
| 3495 |
+
[12/30 23:53:24][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3496 |
+
|
| 3497 |
+
[12/30 23:53:24][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3498 |
+
|
| 3499 |
+
[12/30 23:53:24][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3500 |
+
|
| 3501 |
+
[12/30 23:53:24][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3502 |
+
|
| 3503 |
+
[12/30 23:53:24][INFO] β
[FIT][Epoch 0] finished! 01:08β04:34 | loss_epoch=24.5
|
| 3504 |
+
[12/30 23:55:59][INFO] [Exp Name]: finetune_
|
| 3505 |
+
[12/30 23:55:59][INFO] [GPU x Batch] = 1 x 1
|
| 3506 |
+
[12/30 23:55:59][INFO] [UnityDataset] Found 2 sequences.
|
| 3507 |
+
[12/30 23:55:59][INFO] [Train Dataset][9/9]: name=unity, size=2, genmo.datasets.unity_dataset.UnityDataset
|
| 3508 |
+
[12/30 23:55:59][INFO] [Train Dataset][All]: ConcatDataset size=2
|
| 3509 |
+
[12/30 23:55:59][INFO]
|
| 3510 |
+
[12/30 23:55:59][INFO] [UnityDataset] Found 2 sequences.
|
| 3511 |
+
[12/30 23:55:59][INFO] [Val Dataset][7/7]: name=unity_val, size=2, genmo.datasets.unity_dataset.UnityDataset
|
| 3512 |
+
[12/30 23:55:59][INFO]
|
| 3513 |
+
[12/30 23:56:06][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3514 |
+
[12/30 23:56:23][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_6/checkpoints'
|
| 3515 |
+
[12/30 23:56:35][INFO] Start Fitting...
|
| 3516 |
+
[12/30 23:56:37][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3517 |
+
|
| 3518 |
+
[12/30 23:56:37][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3519 |
+
|
| 3520 |
+
[12/30 23:56:37][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3521 |
+
|
| 3522 |
+
[12/30 23:56:37][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3523 |
+
[12/30 23:56:39][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3524 |
+
return F.conv1d(input, weight, bias, self.stride,
|
| 3525 |
+
|
| 3526 |
+
[12/30 23:56:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3527 |
+
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 3528 |
+
|
| 3529 |
+
[12/30 23:56:42][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
| 3530 |
+
warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
|
| 3531 |
+
|
| 3532 |
+
[12/30 23:56:54][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9930 pred=+0.9643 delta(pred-gt)=-0.0287
|
| 3533 |
+
[12/30 23:56:54][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.03876931 -0.17480041 0.02509396] global_orient0_aa(pred)=[-0.1090048 -1.7763788 -0.15125035]
|
| 3534 |
+
[12/30 23:56:54][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(-9.99,+2.34,+1.24) pred=(-101.99,-9.31,-0.53) pred_vs_gt=(-91.73,-11.16,-3.80)
|
| 3535 |
+
[12/30 23:56:54][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=+94.23
|
| 3536 |
+
[12/30 23:57:45][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3537 |
+
|
| 3538 |
+
[12/30 23:57:45][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3539 |
+
|
| 3540 |
+
[12/30 23:57:45][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3541 |
+
|
| 3542 |
+
[12/30 23:57:45][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3543 |
+
|
| 3544 |
+
[12/30 23:57:45][INFO] β
[FIT][Epoch 0] finished! 01:09β04:38 | loss_epoch=24.5
|
| 3545 |
+
[12/30 23:58:35][INFO] [Exp Name]: finetune_
|
| 3546 |
+
[12/30 23:58:35][INFO] [GPU x Batch] = 1 x 1
|
| 3547 |
+
[12/30 23:58:35][INFO] [UnityDataset] Found 1 sequences.
|
| 3548 |
+
[12/30 23:58:35][INFO] [Train Dataset][9/9]: name=unity, size=1, genmo.datasets.unity_dataset.UnityDataset
|
| 3549 |
+
[12/30 23:58:35][INFO] [Train Dataset][All]: ConcatDataset size=1
|
| 3550 |
+
[12/30 23:58:35][INFO]
|
| 3551 |
+
[12/30 23:58:35][INFO] [UnityDataset] Found 1 sequences.
|
| 3552 |
+
[12/30 23:58:35][INFO] [Val Dataset][7/7]: name=unity_val, size=1, genmo.datasets.unity_dataset.UnityDataset
|
| 3553 |
+
[12/30 23:58:35][INFO]
|
| 3554 |
+
[12/30 23:58:44][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3555 |
+
[12/30 23:59:06][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_7/checkpoints'
|
| 3556 |
+
[12/30 23:59:18][INFO] Start Fitting...
|
| 3557 |
+
[12/30 23:59:20][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3558 |
+
|
| 3559 |
+
[12/30 23:59:20][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3560 |
+
|
| 3561 |
+
[12/30 23:59:20][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3562 |
+
|
| 3563 |
+
[12/30 23:59:20][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3564 |
+
[12/30 23:59:22][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3565 |
+
return F.conv1d(input, weight, bias, self.stride,
|
| 3566 |
+
|
| 3567 |
+
[12/30 23:59:24][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3568 |
+
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 3569 |
+
|
| 3570 |
+
[12/30 23:59:24][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
| 3571 |
+
warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
|
| 3572 |
+
|
| 3573 |
+
[12/30 23:59:36][INFO] [VisUnityVal] e000_100_biboo_birthday_speech_explosion_1 root_y0: gt=+0.9944 pred=+0.9685 delta(pred-gt)=-0.0259
|
| 3574 |
+
[12/30 23:59:36][INFO] [VisUnityVal] e000_100_biboo_birthday_speech_explosion_1 global_orient0_aa(gt)=[0.02056097 0.18737577 0.01068786] global_orient0_aa(pred)=[ 0.0337113 -2.8594027 -0.01747983]
|
| 3575 |
+
[12/30 23:59:36][INFO] [VisUnityVal] e000_100_biboo_birthday_speech_explosion_1 global_orient0_yxz_deg gt=(+10.74,+1.11,+0.72) pred=(-163.84,-0.50,-1.42) pred_vs_gt=(-174.54,-1.98,-1.80)
|
| 3576 |
+
[12/30 23:59:36][INFO] [VisUnityVal] e000_100_biboo_birthday_speech_explosion_1 yaw0_deg(pred_vs_gt)=+174.27
|
| 3577 |
+
[12/31 02:50:01][INFO] [Exp Name]: finetune_
|
| 3578 |
+
[12/31 02:50:01][INFO] [GPU x Batch] = 1 x 1
|
| 3579 |
+
[12/31 02:50:01][INFO] [UnityDataset] Found 1 sequences.
|
| 3580 |
+
[12/31 02:50:01][INFO] [Train Dataset][9/9]: name=unity, size=1, genmo.datasets.unity_dataset.UnityDataset
|
| 3581 |
+
[12/31 02:50:01][INFO] [Train Dataset][All]: ConcatDataset size=1
|
| 3582 |
+
[12/31 02:50:01][INFO]
|
| 3583 |
+
[12/31 02:50:01][INFO] [UnityDataset] Found 1 sequences.
|
| 3584 |
+
[12/31 02:50:01][INFO] [Val Dataset][7/7]: name=unity_val, size=1, genmo.datasets.unity_dataset.UnityDataset
|
| 3585 |
+
[12/31 02:50:01][INFO]
|
| 3586 |
+
[12/31 02:50:07][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3587 |
+
[12/31 02:50:28][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_8/checkpoints'
|
| 3588 |
+
[12/31 02:50:41][INFO] Start Fitting...
|
| 3589 |
+
[12/31 02:50:42][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3590 |
+
|
| 3591 |
+
[12/31 02:50:42][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3592 |
+
|
| 3593 |
+
[12/31 02:50:42][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3594 |
+
|
| 3595 |
+
[12/31 02:50:42][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3596 |
+
[12/31 02:50:43][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3597 |
+
return F.conv1d(input, weight, bias, self.stride,
|
| 3598 |
+
|
| 3599 |
+
[12/31 02:50:45][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3600 |
+
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 3601 |
+
|
| 3602 |
+
[12/31 02:50:45][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
| 3603 |
+
warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
|
| 3604 |
+
|
| 3605 |
+
[12/31 02:50:55][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9875 pred=+0.9726 delta(pred-gt)=-0.0149
|
| 3606 |
+
[12/31 02:50:55][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.01689476 -0.20703591 0.01797612] global_orient0_aa(pred)=[-0.0321125 -2.8486555 -0.07525362]
|
| 3607 |
+
[12/31 02:50:55][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(-11.85,+1.07,+0.92) pred=(-163.30,-3.15,+0.83) pred_vs_gt=(-151.41,-4.11,-0.96)
|
| 3608 |
+
[12/31 02:50:55][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=+151.99
|
| 3609 |
+
[12/31 02:51:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3610 |
+
|
| 3611 |
+
[12/31 02:51:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3612 |
+
|
| 3613 |
+
[12/31 02:51:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3614 |
+
|
| 3615 |
+
[12/31 02:51:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3616 |
+
|
| 3617 |
+
[12/31 02:51:41][INFO] β
[FIT][Epoch 0] finished! 01:00β04:01 | loss_epoch=12.6
|
| 3618 |
+
[12/31 03:10:22][INFO] [Exp Name]: finetune_
|
| 3619 |
+
[12/31 03:10:22][INFO] [GPU x Batch] = 1 x 1
|
| 3620 |
+
[12/31 03:10:22][INFO] [UnityDataset] Found 1 sequences.
|
| 3621 |
+
[12/31 03:10:22][INFO] [Train Dataset][9/9]: name=unity, size=1, genmo.datasets.unity_dataset.UnityDataset
|
| 3622 |
+
[12/31 03:10:22][INFO] [Train Dataset][All]: ConcatDataset size=1
|
| 3623 |
+
[12/31 03:10:22][INFO]
|
| 3624 |
+
[12/31 03:10:22][INFO] [UnityDataset] Found 1 sequences.
|
| 3625 |
+
[12/31 03:10:22][INFO] [Val Dataset][7/7]: name=unity_val, size=1, genmo.datasets.unity_dataset.UnityDataset
|
| 3626 |
+
[12/31 03:10:22][INFO]
|
| 3627 |
+
[12/31 03:10:28][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3628 |
+
[12/31 03:10:51][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_9/checkpoints'
|
| 3629 |
+
[12/31 03:11:01][INFO] Start Fitting...
|
| 3630 |
+
[12/31 03:11:03][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3631 |
+
|
| 3632 |
+
[12/31 03:11:03][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3633 |
+
|
| 3634 |
+
[12/31 03:11:03][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3635 |
+
|
| 3636 |
+
[12/31 03:11:03][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3637 |
+
[12/31 03:11:04][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3638 |
+
return F.conv1d(input, weight, bias, self.stride,
|
| 3639 |
+
|
| 3640 |
+
[12/31 03:11:05][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3641 |
+
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 3642 |
+
|
| 3643 |
+
[12/31 03:11:05][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
| 3644 |
+
warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
|
| 3645 |
+
|
| 3646 |
+
[12/31 03:11:16][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9875 pred=+0.9726 delta(pred-gt)=-0.0149
|
| 3647 |
+
[12/31 03:11:16][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.02646996 2.9343371 -0.02487765] global_orient0_aa(pred)=[-0.03202499 -2.848779 -0.0755955 ]
|
| 3648 |
+
[12/31 03:11:16][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(+168.15,+1.07,+0.92) pred=(-163.31,-3.16,+0.82) pred_vs_gt=(+28.58,+4.12,+0.97)
|
| 3649 |
+
[12/31 03:11:16][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=-28.01
|
| 3650 |
+
[12/31 03:12:02][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3651 |
+
|
| 3652 |
+
[12/31 03:12:02][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3653 |
+
|
| 3654 |
+
[12/31 03:12:02][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3655 |
+
|
| 3656 |
+
[12/31 03:12:02][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3657 |
+
|
| 3658 |
+
[12/31 03:12:02][INFO] β
[FIT][Epoch 0] finished! 01:00β04:01 | loss_epoch=14.2
|
| 3659 |
+
[12/31 03:16:57][INFO] [Exp Name]: finetune_
|
| 3660 |
+
[12/31 03:16:57][INFO] [GPU x Batch] = 1 x 1
|
| 3661 |
+
[12/31 03:16:57][INFO] [UnityDataset] Found 1 sequences.
|
| 3662 |
+
[12/31 03:16:57][INFO] [Train Dataset][9/9]: name=unity, size=1, genmo.datasets.unity_dataset.UnityDataset
|
| 3663 |
+
[12/31 03:16:57][INFO] [Train Dataset][All]: ConcatDataset size=1
|
| 3664 |
+
[12/31 03:16:57][INFO]
|
| 3665 |
+
[12/31 03:16:57][INFO] [UnityDataset] Found 1 sequences.
|
| 3666 |
+
[12/31 03:16:57][INFO] [Val Dataset][7/7]: name=unity_val, size=1, genmo.datasets.unity_dataset.UnityDataset
|
| 3667 |
+
[12/31 03:16:57][INFO]
|
| 3668 |
+
[12/31 03:17:04][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3669 |
+
[12/31 03:17:24][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_10/checkpoints'
|
| 3670 |
+
[12/31 03:17:36][INFO] Start Fitting...
|
| 3671 |
+
[12/31 03:17:38][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3672 |
+
|
| 3673 |
+
[12/31 03:17:38][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3674 |
+
|
| 3675 |
+
[12/31 03:17:38][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3676 |
+
|
| 3677 |
+
[12/31 03:17:38][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3678 |
+
[12/31 03:17:40][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3679 |
+
return F.conv1d(input, weight, bias, self.stride,
|
| 3680 |
+
|
| 3681 |
+
[12/31 03:17:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3682 |
+
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 3683 |
+
|
| 3684 |
+
[12/31 03:17:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
| 3685 |
+
warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
|
| 3686 |
+
|
| 3687 |
+
[12/31 03:17:52][INFO] [VisUnityVal] e000_100_biboo_birthday_speech_explosion_1 root_y0: gt=+0.9944 pred=+0.9686 delta(pred-gt)=-0.0258
|
| 3688 |
+
[12/31 03:17:52][INFO] [VisUnityVal] e000_100_biboo_birthday_speech_explosion_1 global_orient0_aa(gt)=[-0.01583315 -2.9540217 0.03045931] global_orient0_aa(pred)=[ 0.03382589 -2.8592563 -0.01758517]
|
| 3689 |
+
[12/31 03:17:52][INFO] [VisUnityVal] e000_100_biboo_birthday_speech_explosion_1 global_orient0_yxz_deg gt=(-169.26,+1.11,+0.72) pred=(-163.83,-0.50,-1.43) pred_vs_gt=(+5.47,+1.99,+1.81)
|
| 3690 |
+
[12/31 03:17:52][INFO] [VisUnityVal] e000_100_biboo_birthday_speech_explosion_1 yaw0_deg(pred_vs_gt)=-5.75
|
| 3691 |
+
[12/31 03:18:36][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3692 |
+
|
| 3693 |
+
[12/31 03:18:36][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3694 |
+
|
| 3695 |
+
[12/31 03:18:36][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3696 |
+
|
| 3697 |
+
[12/31 03:18:36][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3698 |
+
|
| 3699 |
+
[12/31 03:18:36][INFO] β
[FIT][Epoch 0] finished! 00:58β03:55 | loss_epoch=23
|
| 3700 |
+
[12/31 06:06:15][INFO] [Exp Name]: finetune_
|
| 3701 |
+
[12/31 06:06:15][INFO] [GPU x Batch] = 1 x 1
|
| 3702 |
+
[12/31 06:06:15][INFO] [UnityDataset] Found 3 sequences.
|
| 3703 |
+
[12/31 06:06:15][INFO] [Train Dataset][9/9]: name=unity, size=3, genmo.datasets.unity_dataset.UnityDataset
|
| 3704 |
+
[12/31 06:06:15][INFO] [Train Dataset][All]: ConcatDataset size=3
|
| 3705 |
+
[12/31 06:06:15][INFO]
|
| 3706 |
+
[12/31 06:06:15][INFO] [UnityDataset] Found 3 sequences.
|
| 3707 |
+
[12/31 06:06:15][INFO] [Val Dataset][7/7]: name=unity_val, size=3, genmo.datasets.unity_dataset.UnityDataset
|
| 3708 |
+
[12/31 06:06:15][INFO]
|
| 3709 |
+
[12/31 06:06:21][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3710 |
+
[12/31 06:06:49][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_11/checkpoints'
|
| 3711 |
+
[12/31 06:07:02][INFO] Start Fitting...
|
| 3712 |
+
[12/31 06:07:04][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3713 |
+
|
| 3714 |
+
[12/31 06:07:04][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3715 |
+
|
| 3716 |
+
[12/31 06:07:04][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3717 |
+
|
| 3718 |
+
[12/31 06:07:04][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3719 |
+
[12/31 06:07:07][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3720 |
+
return F.conv1d(input, weight, bias, self.stride,
|
| 3721 |
+
|
| 3722 |
+
[12/31 06:07:09][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3723 |
+
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 3724 |
+
|
| 3725 |
+
[12/31 06:07:11][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
| 3726 |
+
warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
|
| 3727 |
+
|
| 3728 |
+
[12/31 06:07:22][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9820 pred=+0.9698 delta(pred-gt)=-0.0123
|
| 3729 |
+
[12/31 06:07:22][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.03590106 -0.17807975 0.02012725] global_orient0_aa(pred)=[-0.08420898 -2.6493108 -0.07150012]
|
| 3730 |
+
[12/31 06:07:22][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(-10.19,+2.15,+0.96) pred=(-151.99,-3.76,+2.70) pred_vs_gt=(-141.82,-6.13,+0.67)
|
| 3731 |
+
[12/31 06:07:22][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=+144.65
|
| 3732 |
+
[12/31 06:08:17][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3733 |
+
|
| 3734 |
+
[12/31 06:08:17][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3735 |
+
|
| 3736 |
+
[12/31 06:08:17][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3737 |
+
|
| 3738 |
+
[12/31 06:08:17][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3739 |
+
|
| 3740 |
+
[12/31 06:08:17][INFO] β
[FIT][Epoch 0] finished! 01:14β04:57 | loss_epoch=41.9
|
| 3741 |
+
[12/31 06:13:34][INFO] [Exp Name]: finetune_
|
| 3742 |
+
[12/31 06:13:34][INFO] [GPU x Batch] = 1 x 1
|
| 3743 |
+
[12/31 06:13:34][INFO] [UnityDataset] Found 3 sequences.
|
| 3744 |
+
[12/31 06:13:34][INFO] [Train Dataset][9/9]: name=unity, size=3, genmo.datasets.unity_dataset.UnityDataset
|
| 3745 |
+
[12/31 06:13:34][INFO] [Train Dataset][All]: ConcatDataset size=3
|
| 3746 |
+
[12/31 06:13:34][INFO]
|
| 3747 |
+
[12/31 06:13:34][INFO] [UnityDataset] Found 3 sequences.
|
| 3748 |
+
[12/31 06:13:34][INFO] [Val Dataset][7/7]: name=unity_val, size=3, genmo.datasets.unity_dataset.UnityDataset
|
| 3749 |
+
[12/31 06:13:34][INFO]
|
| 3750 |
+
[12/31 06:13:43][INFO] [Exp Name]: finetune_
|
| 3751 |
+
[12/31 06:13:43][INFO] [GPU x Batch] = 1 x 1
|
| 3752 |
+
[12/31 06:13:43][INFO] [UnityDataset] Found 1 sequences.
|
| 3753 |
+
[12/31 06:13:43][INFO] [Train Dataset][9/9]: name=unity, size=1, genmo.datasets.unity_dataset.UnityDataset
|
| 3754 |
+
[12/31 06:13:43][INFO] [Train Dataset][All]: ConcatDataset size=1
|
| 3755 |
+
[12/31 06:13:43][INFO]
|
| 3756 |
+
[12/31 06:13:43][INFO] [UnityDataset] Found 1 sequences.
|
| 3757 |
+
[12/31 06:13:43][INFO] [Val Dataset][7/7]: name=unity_val, size=1, genmo.datasets.unity_dataset.UnityDataset
|
| 3758 |
+
[12/31 06:13:43][INFO]
|
| 3759 |
+
[12/31 06:13:48][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3760 |
+
[12/31 06:14:11][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_12/checkpoints'
|
| 3761 |
+
[12/31 06:14:22][INFO] Start Fitting...
|
| 3762 |
+
[12/31 06:14:26][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3763 |
+
|
| 3764 |
+
[12/31 06:14:26][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3765 |
+
|
| 3766 |
+
[12/31 06:14:26][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3767 |
+
|
| 3768 |
+
[12/31 06:14:26][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3769 |
+
[12/31 06:14:28][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3770 |
+
return F.conv1d(input, weight, bias, self.stride,
|
| 3771 |
+
|
| 3772 |
+
[12/31 06:14:30][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3773 |
+
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 3774 |
+
|
| 3775 |
+
[12/31 06:14:30][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
| 3776 |
+
warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
|
| 3777 |
+
|
| 3778 |
+
[12/31 06:14:41][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9875 pred=+0.9726 delta(pred-gt)=-0.0149
|
| 3779 |
+
[12/31 06:14:41][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.01689476 -0.20703594 0.01797612] global_orient0_aa(pred)=[-0.03202499 -2.848779 -0.0755955 ]
|
| 3780 |
+
[12/31 06:14:41][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(-11.85,+1.07,+0.92) pred=(-163.31,-3.16,+0.82) pred_vs_gt=(-151.42,-4.12,-0.97)
|
| 3781 |
+
[12/31 06:14:41][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=+151.99
|
| 3782 |
+
[12/31 06:15:23][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3783 |
+
|
| 3784 |
+
[12/31 06:15:23][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3785 |
+
|
| 3786 |
+
[12/31 06:15:23][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3787 |
+
|
| 3788 |
+
[12/31 06:15:23][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3789 |
+
|
| 3790 |
+
[12/31 06:15:23][INFO] β
[FIT][Epoch 0] finished! 01:00β04:00 | loss_epoch=14.3
|
| 3791 |
+
[12/31 06:19:20][INFO] [Exp Name]: finetune_
|
| 3792 |
+
[12/31 06:19:20][INFO] [GPU x Batch] = 1 x 1
|
| 3793 |
+
[12/31 06:19:20][INFO] [UnityDataset] Found 1 sequences.
|
| 3794 |
+
[12/31 06:19:20][INFO] [Train Dataset][9/9]: name=unity, size=1, genmo.datasets.unity_dataset.UnityDataset
|
| 3795 |
+
[12/31 06:19:20][INFO] [Train Dataset][All]: ConcatDataset size=1
|
| 3796 |
+
[12/31 06:19:20][INFO]
|
| 3797 |
+
[12/31 06:19:20][INFO] [UnityDataset] Found 1 sequences.
|
| 3798 |
+
[12/31 06:19:20][INFO] [Val Dataset][7/7]: name=unity_val, size=1, genmo.datasets.unity_dataset.UnityDataset
|
| 3799 |
+
[12/31 06:19:20][INFO]
|
| 3800 |
+
[12/31 06:19:26][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
|
| 3801 |
+
[12/31 06:19:48][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_13/checkpoints'
|
| 3802 |
+
[12/31 06:19:59][INFO] Start Fitting...
|
| 3803 |
+
[12/31 06:20:01][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
|
| 3804 |
+
|
| 3805 |
+
[12/31 06:20:01][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3806 |
+
|
| 3807 |
+
[12/31 06:20:01][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
|
| 3808 |
+
|
| 3809 |
+
[12/31 06:20:01][INFO] π[FIT][Epoch 0] Data: unity Experiment: finetune_
|
| 3810 |
+
[12/31 06:20:04][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3811 |
+
return F.conv1d(input, weight, bias, self.stride,
|
| 3812 |
+
|
| 3813 |
+
[12/31 06:20:06][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
|
| 3814 |
+
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 3815 |
+
|
| 3816 |
+
[12/31 06:20:06][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
| 3817 |
+
warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
|
| 3818 |
+
|
| 3819 |
+
[12/31 06:20:16][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9875 pred=+0.9726 delta(pred-gt)=-0.0149
|
| 3820 |
+
[12/31 06:20:16][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.02646996 2.9343371 -0.02487765] global_orient0_aa(pred)=[-0.03202499 -2.848779 -0.0755955 ]
|
| 3821 |
+
[12/31 06:20:16][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(+168.15,+1.07,+0.92) pred=(-163.31,-3.16,+0.82) pred_vs_gt=(+28.58,+4.12,+0.97)
|
| 3822 |
+
[12/31 06:20:16][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=-28.01
|
| 3823 |
+
[12/31 06:20:59][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3824 |
+
|
| 3825 |
+
[12/31 06:20:59][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3826 |
+
|
| 3827 |
+
[12/31 06:20:59][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3828 |
+
|
| 3829 |
+
[12/31 06:20:59][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
|
| 3830 |
+
|
| 3831 |
+
[12/31 06:20:59][INFO] β
[FIT][Epoch 0] finished! 00:59β03:56 | loss_epoch=14.2
|
train.sh
CHANGED
|
@@ -1 +1,9 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
# Make sure local repo modules (incl. `third_party/*`) are importable.
|
| 5 |
+
export PYTHONPATH="$(pwd)${PYTHONPATH:+:$PYTHONPATH}"
|
| 6 |
+
# GVHMR uses absolute imports like `import hmr4d...` internally.
|
| 7 |
+
export PYTHONPATH="$(pwd)/third_party/GVHMR${PYTHONPATH:+:$PYTHONPATH}"
|
| 8 |
+
|
| 9 |
+
python scripts/train.py --config-name finetune_unity ckpt_path=./s050000.ckpt
|