zirobtc commited on
Commit
7ee73c7
Β·
verified Β·
1 Parent(s): f1f9bbf

Upload folder using huggingface_hub

Browse files
configs/callbacks/vis/vis_unity_val.yaml CHANGED
@@ -3,6 +3,9 @@ vis_unity_val:
3
  enabled: false
4
  every_n_epochs: 1
5
  num_batches: 1
 
 
 
6
  num_frames: 30
7
  render_incam: true
8
  render_global: true
 
3
  enabled: false
4
  every_n_epochs: 1
5
  num_batches: 1
6
+ # Which val batches to render: "first" or "random".
7
+ batch_select: "first"
8
+ batch_select_seed: 123
9
  num_frames: 30
10
  render_incam: true
11
  render_global: true
genmo/callbacks/vis/vis_unity_val.py CHANGED
@@ -31,6 +31,8 @@ class VisUnityVal(pl.Callback):
31
  num_frames: int = 30,
32
  render_incam: bool = True,
33
  render_global: bool = True,
 
 
34
  use_gt_betas_for_pred: bool = True,
35
  global_root_relative: bool = False,
36
  postprocess_global: bool = True,
@@ -46,6 +48,8 @@ class VisUnityVal(pl.Callback):
46
  self.num_frames = num_frames
47
  self.render_incam = render_incam
48
  self.render_global = render_global
 
 
49
  self.use_gt_betas_for_pred = use_gt_betas_for_pred
50
  self.global_root_relative = global_root_relative
51
  self.postprocess_global = postprocess_global
@@ -58,6 +62,41 @@ class VisUnityVal(pl.Callback):
58
  self._smplx2smpl = None
59
  self._faces = None
60
  self._J_regressor = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  def _lazy_init_models(self, device: torch.device):
63
  if self._smplx is None:
@@ -118,8 +157,19 @@ class VisUnityVal(pl.Callback):
118
  return
119
  if self.every_n_epochs is not None and (trainer.current_epoch % self.every_n_epochs) != 0:
120
  return
121
- if batch_idx >= self.num_batches:
122
- return
 
 
 
 
 
 
 
 
 
 
 
123
 
124
  if outputs is None or "pred_smpl_params_incam" not in outputs:
125
  Log.warning("[VisUnityVal] Missing `pred_smpl_params_incam` in outputs; skipping.")
@@ -128,33 +178,31 @@ class VisUnityVal(pl.Callback):
128
  meta_render = None
129
  if "meta_render" in batch and isinstance(batch["meta_render"], list) and batch["meta_render"]:
130
  meta_render = batch["meta_render"][0]
131
- img_paths = meta_render.get("img_paths") if isinstance(meta_render, dict) else None
132
- if not img_paths:
133
- Log.warning("[VisUnityVal] Missing `meta_render.img_paths`; skipping incam rendering.")
134
- return
135
 
136
  vid = batch["meta"][0].get("vid", f"b{batch_idx:03d}")
137
  vid = self._safe_vid(str(vid))
138
 
139
  # Pick frames to render (within the already-sliced/padded window).
140
- L = int(batch["K_fullimg"].shape[1]) if "K_fullimg" in batch else len(img_paths)
141
  if L <= 0:
142
  return
143
  num_frames = min(self.num_frames, L)
144
  frame_idxs = np.linspace(0, L - 1, num_frames).round().astype(int)
145
 
146
- # Read one frame to get size.
147
- first = cv2.imread(img_paths[int(frame_idxs[0])])
148
- if first is None:
149
- Log.warning(f"[VisUnityVal] Failed to read image: {img_paths[int(frame_idxs[0])]}")
150
- return
151
- height, width = first.shape[:2]
152
-
153
  device = pl_module.device
154
  self._lazy_init_models(device)
155
 
156
- # Renderer uses a single K; in practice K is constant for Unity sequences.
157
  K = batch["K_fullimg"][0, 0].to(device)
 
 
 
 
 
 
 
 
158
  renderer_incam = Renderer(width, height, device=device, faces=self._faces, K=K)
159
  # Make the overlay look "flat colored" (no Phong shading).
160
  try:
@@ -190,10 +238,7 @@ class VisUnityVal(pl.Callback):
190
  renderer_incam.set_intrinsic(K_fi)
191
  except Exception:
192
  pass
193
- frame_bgr = cv2.imread(img_paths[int(fi)])
194
- if frame_bgr is None:
195
- continue
196
- frame = frame_bgr[..., ::-1] # RGB
197
  img = renderer_incam.render_mesh(gt_verts_incam[i], frame, colors=self.gt_color)
198
  img = renderer_incam.render_mesh(pred_verts_incam[i], img, colors=self.pred_color)
199
  writer.write_frame(img.astype(np.uint8))
 
31
  num_frames: int = 30,
32
  render_incam: bool = True,
33
  render_global: bool = True,
34
+ batch_select: str = "first",
35
+ batch_select_seed: int = 123,
36
  use_gt_betas_for_pred: bool = True,
37
  global_root_relative: bool = False,
38
  postprocess_global: bool = True,
 
48
  self.num_frames = num_frames
49
  self.render_incam = render_incam
50
  self.render_global = render_global
51
+ self.batch_select = str(batch_select or "first").strip().lower()
52
+ self.batch_select_seed = int(batch_select_seed)
53
  self.use_gt_betas_for_pred = use_gt_betas_for_pred
54
  self.global_root_relative = global_root_relative
55
  self.postprocess_global = postprocess_global
 
62
  self._smplx2smpl = None
63
  self._faces = None
64
  self._J_regressor = None
65
+ self._selected_batch_idxs_by_loader = {}
66
+ self._seen_batch_count_by_loader = {}
67
+
68
+ def on_validation_epoch_start(self, trainer, pl_module):
69
+ self._selected_batch_idxs_by_loader = {}
70
+ self._seen_batch_count_by_loader = {}
71
+ if not self.enabled:
72
+ return
73
+ if trainer.global_rank != 0:
74
+ return
75
+ if self.every_n_epochs is not None and (trainer.current_epoch % self.every_n_epochs) != 0:
76
+ return
77
+
78
+ # Try to deterministically select which batches to render for each val dataloader.
79
+ try:
80
+ num_val_batches = getattr(trainer, "num_val_batches", None)
81
+ if num_val_batches is None:
82
+ return
83
+ if isinstance(num_val_batches, int):
84
+ num_val_batches = [num_val_batches]
85
+ for dl_idx, n in enumerate(list(num_val_batches)):
86
+ n = int(n)
87
+ if n <= 0:
88
+ self._selected_batch_idxs_by_loader[dl_idx] = set()
89
+ continue
90
+ k = min(int(self.num_batches), n)
91
+ if self.batch_select == "random":
92
+ rng = np.random.default_rng(int(self.batch_select_seed) + int(trainer.current_epoch) * 1000 + int(dl_idx))
93
+ chosen = rng.choice(np.arange(n, dtype=np.int64), size=k, replace=False)
94
+ self._selected_batch_idxs_by_loader[dl_idx] = set(int(x) for x in chosen.tolist())
95
+ else:
96
+ self._selected_batch_idxs_by_loader[dl_idx] = set(range(k))
97
+ except Exception:
98
+ # Fallback: keep legacy behavior (first N batches).
99
+ self._selected_batch_idxs_by_loader = {}
100
 
101
  def _lazy_init_models(self, device: torch.device):
102
  if self._smplx is None:
 
157
  return
158
  if self.every_n_epochs is not None and (trainer.current_epoch % self.every_n_epochs) != 0:
159
  return
160
+ dl_i = int(dataloader_idx)
161
+ local_idx = int(self._seen_batch_count_by_loader.get(dl_i, 0))
162
+ self._seen_batch_count_by_loader[dl_i] = local_idx + 1
163
+
164
+ selected = self._selected_batch_idxs_by_loader.get(dl_i, None)
165
+ if selected is None:
166
+ # Fallback: legacy behavior.
167
+ if batch_idx >= self.num_batches:
168
+ return
169
+ else:
170
+ # Use loader-local index (CombinedLoader may provide a global `batch_idx`).
171
+ if local_idx not in selected:
172
+ return
173
 
174
  if outputs is None or "pred_smpl_params_incam" not in outputs:
175
  Log.warning("[VisUnityVal] Missing `pred_smpl_params_incam` in outputs; skipping.")
 
178
  meta_render = None
179
  if "meta_render" in batch and isinstance(batch["meta_render"], list) and batch["meta_render"]:
180
  meta_render = batch["meta_render"][0]
181
+ # NOTE: Do not depend on image/video I/O for validation visualization; render on black.
 
 
 
182
 
183
  vid = batch["meta"][0].get("vid", f"b{batch_idx:03d}")
184
  vid = self._safe_vid(str(vid))
185
 
186
  # Pick frames to render (within the already-sliced/padded window).
187
+ L = int(batch["K_fullimg"].shape[1]) if "K_fullimg" in batch else 0
188
  if L <= 0:
189
  return
190
  num_frames = min(self.num_frames, L)
191
  frame_idxs = np.linspace(0, L - 1, num_frames).round().astype(int)
192
 
 
 
 
 
 
 
 
193
  device = pl_module.device
194
  self._lazy_init_models(device)
195
 
196
+ # Render on black; infer output size from principal point (usually near W/2, H/2).
197
  K = batch["K_fullimg"][0, 0].to(device)
198
+ try:
199
+ cx = float(K[0, 2].detach().cpu().item())
200
+ cy = float(K[1, 2].detach().cpu().item())
201
+ width = max(64, int(round(cx * 2.0)))
202
+ height = max(64, int(round(cy * 2.0)))
203
+ except Exception:
204
+ width, height = 1280, 720
205
+
206
  renderer_incam = Renderer(width, height, device=device, faces=self._faces, K=K)
207
  # Make the overlay look "flat colored" (no Phong shading).
208
  try:
 
238
  renderer_incam.set_intrinsic(K_fi)
239
  except Exception:
240
  pass
241
+ frame = np.zeros((height, width, 3), dtype=np.uint8) # RGB black
 
 
 
242
  img = renderer_incam.render_mesh(gt_verts_incam[i], frame, colors=self.gt_color)
243
  img = renderer_incam.render_mesh(pred_verts_incam[i], img, colors=self.pred_color)
244
  writer.write_frame(img.astype(np.uint8))
genmo/utils/vis/renderer.py CHANGED
@@ -291,8 +291,13 @@ class Renderer:
291
  verts_features = colors.to(device=vertices.device, dtype=vertices.dtype)
292
  colors = [0.8, 0.8, 0.8]
293
  else:
294
- if colors[0] > 1:
295
- colors = [c / 255.0 for c in colors]
 
 
 
 
 
296
  verts_features = (
297
  torch.tensor(colors)
298
  .reshape(1, 1, 3)
 
291
  verts_features = colors.to(device=vertices.device, dtype=vertices.dtype)
292
  colors = [0.8, 0.8, 0.8]
293
  else:
294
+ # Accept either [0..1] floats or [0..255] uint8-like colors.
295
+ # Don't key off `colors[0]` because valid RGB like green [0,255,0] would fail.
296
+ try:
297
+ if max(colors) > 1:
298
+ colors = [c / 255.0 for c in colors]
299
+ except Exception:
300
+ pass
301
  verts_features = (
302
  torch.tensor(colors)
303
  .reshape(1, 1, 3)
scripts/train.py CHANGED
@@ -1,5 +1,21 @@
1
- import builtins
2
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from datetime import datetime
4
 
5
  import hydra
 
 
1
  import os
2
+ import sys
3
+
4
+ # Ensure repo root is importable when running as `python scripts/train.py`.
5
+ # Without this, `genmo.*` may resolve from site-packages while `third_party.*`
6
+ # (a namespace package in this repo) fails to import, which Hydra reports as
7
+ # "Error locating target ...".
8
+ _REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
9
+ if _REPO_ROOT not in sys.path:
10
+ sys.path.insert(0, _REPO_ROOT)
11
+
12
+ # GVHMR uses absolute imports like `import hmr4d...` internally, so its repo root
13
+ # must also be importable.
14
+ _GVHMR_ROOT = os.path.join(_REPO_ROOT, "third_party", "GVHMR")
15
+ if os.path.isdir(_GVHMR_ROOT) and _GVHMR_ROOT not in sys.path:
16
+ sys.path.insert(0, _GVHMR_ROOT)
17
+
18
+ import builtins
19
  from datetime import datetime
20
 
21
  import hydra
third_party/GVHMR/hmr4d/utils/vis/renderer.py CHANGED
@@ -223,8 +223,13 @@ class Renderer:
223
  verts_features = colors.to(device=vertices.device, dtype=vertices.dtype)
224
  colors = [0.8, 0.8, 0.8]
225
  else:
226
- if colors[0] > 1:
227
- colors = [c / 255.0 for c in colors]
 
 
 
 
 
228
  verts_features = torch.tensor(colors).reshape(1, 1, 3).to(device=vertices.device, dtype=vertices.dtype)
229
  verts_features = verts_features.repeat(1, vertices.shape[1], 1)
230
  textures = TexturesVertex(verts_features=verts_features)
 
223
  verts_features = colors.to(device=vertices.device, dtype=vertices.dtype)
224
  colors = [0.8, 0.8, 0.8]
225
  else:
226
+ # Accept either [0..1] floats or [0..255] uint8-like colors.
227
+ # Don't key off `colors[0]` because valid RGB like green [0,255,0] would fail.
228
+ try:
229
+ if max(colors) > 1:
230
+ colors = [c / 255.0 for c in colors]
231
+ except Exception:
232
+ pass
233
  verts_features = torch.tensor(colors).reshape(1, 1, 3).to(device=vertices.device, dtype=vertices.dtype)
234
  verts_features = verts_features.repeat(1, vertices.shape[1], 1)
235
  textures = TexturesVertex(verts_features=verts_features)
third_party/GVHMR/process_data.sh CHANGED
@@ -1 +1 @@
1
- python tools/demo/process_dataset.py --input /mnt/c/Temp/SyntheticDataset --output ./processed_data --vitpose --workers 3 --debug
 
1
+ python tools/demo/process_dataset.py --input /mnt/c/Temp/SyntheticDataset --output ./processed_dataset --genmo --debug
third_party/GVHMR/tools/demo/process_dataset.py CHANGED
@@ -1,879 +1,1760 @@
1
  import sys
 
2
  import os
 
3
  import json
 
4
  import argparse
 
5
  import numpy as np
 
6
  import zlib
 
7
  from glob import glob
 
8
  from tqdm import tqdm
 
9
  import cv2
 
10
  import torch
 
11
  from scipy.spatial.transform import Rotation as R
 
12
  import time
 
13
  import shutil
 
14
  from pathlib import Path
15
 
 
 
16
  # --- SETUP PATHS FOR IMPORTS ---
17
- REPO_ROOT = Path(__file__).resolve().parents[2] # Adjust as needed based on where this script lives
 
 
18
  if str(REPO_ROOT) not in sys.path:
 
19
  sys.path.insert(0, str(REPO_ROOT))
20
-
21
- # Try to import Extractor. If this fails, the script will error out early.
22
- try:
23
- gvhmr_root = REPO_ROOT / "third_party" / "GVHMR"
24
- if gvhmr_root.exists() and str(gvhmr_root) not in sys.path:
25
- sys.path.insert(0, str(gvhmr_root))
26
- from hmr4d.utils.preproc.vitfeat_extractor import Extractor
27
- from hmr4d.utils.pylogger import Log
28
- except ImportError:
29
- # Fallback/Mock for standalone testing if repo structure differs,
30
- # but based on your prompt, this path should exist.
31
- print("WARNING: Could not import Extractor. Feature extraction will fail.")
32
- Extractor = None
33
-
34
- # Force single thread for libraries
35
  os.environ["OMP_NUM_THREADS"] = "1"
 
36
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 
37
  cv2.setNumThreads(0)
 
38
  torch.set_num_threads(1)
39
 
 
 
40
  FPS = 30.0
41
- DEBUG_NUM_FRAMES = 60
 
 
42
  IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
 
43
  IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)
44
 
45
- # --- HELPER FUNCTIONS ---
 
 
46
 
47
  def _process_image_memory(img_bgr, bbox_xywh, img_size=256):
48
- """
49
- Adapted from process_single_image to work on in-memory numpy arrays.
50
- """
51
- if img_bgr is None:
52
- return np.zeros((3, img_size, img_size), dtype=np.float32)
53
 
54
  x, y, w, h = bbox_xywh
 
55
  cx, cy = x + w/2, y + h/2
56
- # Genmo/HMR usually uses max(w,h) * 1.2 or similar, ensuring we match the prompt's scale logic if implicit
57
- # Assuming the bbox provided in json is already the correct crop region or close to it.
58
- # The user provided snippet used 'scale'. We approximate scale from bbox if not provided.
59
- # Standard HMR extraction uses a specific scale factor.
60
- # Here we assume the input bbox is the "tight" bbox and we need to square it.
61
- scale = max(w, h) * 1.2
62
-
63
  H, W = img_bgr.shape[:2]
 
64
  max_side = float(max(H, W, 1))
65
 
66
- if scale <= 1.0 or scale > max_side * 20.0:
67
- # Fallback for bad scales
68
- scale = max_side * 0.5
69
-
70
  half = scale / 2.0
 
71
  x0, y0 = int(cx - half), int(cy - half)
 
72
  x1, y1 = int(cx + half), int(cy + half)
73
 
74
  pad_l, pad_t = max(0, -x0), max(0, -y0)
 
75
  pad_r, pad_b = max(0, x1 - W), max(0, y1 - H)
76
 
77
- if max(pad_l, pad_t, pad_r, pad_b) > int(max_side * 4.0):
78
- # Sanity check fail, return black
79
- return np.zeros((3, img_size, img_size), dtype=np.float32)
80
 
81
  if pad_l or pad_t or pad_r or pad_b:
 
82
  img_bgr = cv2.copyMakeBorder(img_bgr, pad_t, pad_b, pad_l, pad_r, cv2.BORDER_CONSTANT, value=(0,0,0))
 
83
  x0 += pad_l; y0 += pad_t; x1 += pad_l; y1 += pad_t
84
 
85
  crop = img_bgr[y0:y1, x0:x1]
86
-
87
- if crop.size == 0:
88
- return np.zeros((3, img_size, img_size), dtype=np.float32)
89
-
90
  if crop.shape[0] != img_size or crop.shape[1] != img_size:
 
91
  crop = cv2.resize(crop, (img_size, img_size), interpolation=cv2.INTER_LINEAR)
92
 
93
- # Normalize
94
- crop = crop[:, :, ::-1].astype(np.float32) / 255.0 # BGR to RGB
95
  crop = (crop - IMAGENET_MEAN) / IMAGENET_STD
96
- return crop.transpose(2, 0, 1) # HWC -> CHW
 
 
 
97
 
98
  def _alpha_blend_bgra_onto_bgr(dst_bgr, src_bgra, x, y):
 
99
  if dst_bgr is None or src_bgra is None: return dst_bgr
 
100
  H, W = dst_bgr.shape[:2]
 
101
  h, w = src_bgra.shape[:2]
 
102
  if w <= 0 or h <= 0: return dst_bgr
 
103
  x0, y0 = max(int(x), 0), max(int(y), 0)
 
104
  x1, y1 = min(int(x + w), W), min(int(y + h), H)
 
105
  if x1 <= x0 or y1 <= y0: return dst_bgr
106
-
107
  roi = dst_bgr[y0:y1, x0:x1]
 
108
  src_crop = src_bgra[(y0 - int(y)):(y0 - int(y)) + (y1 - y0), (x0 - int(x)):(x0 - int(x)) + (x1 - x0)]
109
-
110
  if src_crop.shape[2] == 3:
 
111
  roi[:] = src_crop
 
112
  return dst_bgr
113
 
114
  alpha = src_crop[:, :, 3].astype(np.uint16)
 
115
  inv_alpha = 255 - alpha
 
116
  b_src, g_src, r_src = src_crop[:, :, 0], src_crop[:, :, 1], src_crop[:, :, 2]
 
117
  b_dst, g_dst, r_dst = roi[:, :, 0], roi[:, :, 1], roi[:, :, 2]
118
 
119
  roi[:, :, 0] = ((b_src * alpha + b_dst * inv_alpha) >> 8).astype(np.uint8)
 
120
  roi[:, :, 1] = ((g_src * alpha + g_dst * inv_alpha) >> 8).astype(np.uint8)
 
121
  roi[:, :, 2] = ((r_src * alpha + r_dst * inv_alpha) >> 8).astype(np.uint8)
 
122
  return dst_bgr
123
 
 
 
124
  def _find_ui_dir():
 
125
  cand = os.path.join(os.getcwd(), "UI")
 
126
  if os.path.isdir(cand): return cand
127
- script_dir = os.path.dirname(os.path.abspath(__file__))
128
- cand2 = os.path.abspath(os.path.join(script_dir, "..", "..", "UI"))
129
- if os.path.isdir(cand2): return cand2
130
- return None
131
 
132
  def _find_font_path(ui_dir, filename="Inter_18pt-Bold.ttf"):
 
133
  if not ui_dir: return None
 
134
  p = os.path.join(ui_dir, filename)
 
135
  return p if os.path.isfile(p) else None
136
 
 
 
137
  def _load_ui_images(ui_dir):
 
138
  if not ui_dir or (not os.path.isdir(ui_dir)): return []
 
139
  imgs = []
 
140
  for name in sorted(os.listdir(ui_dir)):
 
141
  p = os.path.join(ui_dir, name)
 
142
  if not os.path.isfile(p): continue
 
143
  if name.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
 
144
  im = cv2.imread(p, cv2.IMREAD_UNCHANGED)
 
145
  if im is not None:
 
146
  if im.ndim == 2: im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR)
 
147
  imgs.append(im)
 
148
  return imgs
149
 
 
 
150
  class SimpleUIOverlay:
 
151
  def __init__(self, width, height, seed=0, ui_dir=None, max_images=4, show_prob=0.6, min_hold_frames=20, max_hold_frames=120):
 
152
  self.W, self.H = int(width), int(height)
 
153
  self.rng = np.random.default_rng(int(seed))
 
154
  self.max_images = max(0, int(max_images))
 
155
  self.show_prob = float(show_prob)
 
156
  self.min_hold_frames, self.max_hold_frames = max(1, int(min_hold_frames)), max(1, int(max_hold_frames))
 
157
  self.ui_dir = ui_dir if ui_dir else _find_ui_dir()
 
158
  self.assets = _load_ui_images(self.ui_dir)
 
159
  self._ttl, self._active = 0, []
160
 
161
  def _pick_new_state(self):
 
162
  self._ttl = int(self.rng.integers(self.min_hold_frames, self.max_hold_frames + 1))
 
163
  self._active = []
 
164
  if (not self.assets) or (self.max_images <= 0): return
 
165
  if float(self.rng.random()) > self.show_prob: return
 
166
  k = min(int(self.rng.integers(1, self.max_images + 1)), len(self.assets))
 
167
  idxs = self.rng.choice(len(self.assets), size=k, replace=False)
 
168
  for idx in idxs:
 
169
  im = self.assets[int(idx)]
 
170
  h, w = im.shape[:2]
 
171
  if w > 0 and h > 0:
 
172
  x = int(self.rng.integers(-w // 4, max(1, self.W - (3 * w // 4))))
 
173
  y = int(self.rng.integers(-h // 4, max(1, self.H - (3 * h // 4))))
 
174
  self._active.append((im, x, y))
175
 
176
  def draw(self, img_bgr):
 
177
  if img_bgr is None: return img_bgr
 
178
  if self._ttl <= 0: self._pick_new_state()
 
179
  self._ttl -= 1
180
- for im, x, y in self._active:
181
- _alpha_blend_bgra_onto_bgr(img_bgr, im, x, y)
 
182
  return img_bgr
183
 
 
 
184
  class SimpleChatOverlay:
 
185
  def __init__(self, width, height, seed=0, num_lines=7, region_w=420, region_h=180, margin=18, every_n_frames=15, corner=None, font_path=None):
 
186
  from collections import deque
 
187
  self.W, self.H = int(width), int(height)
 
188
  self.rng = np.random.default_rng(int(seed))
 
189
  self.num_lines, self.margin, self.every_n_frames = int(num_lines), int(margin), max(1, int(every_n_frames))
 
190
  self.region_w, self.region_h = int(region_w), int(region_h)
 
191
  self.font_path = font_path
 
192
  self._pil_fonts = {}
 
193
  self.corner = str(corner) if corner else str(self.rng.choice(["tl", "tr", "bl", "br"]))
 
194
  self.messages = deque(maxlen=self.num_lines)
 
195
  for _ in range(self.num_lines): self.messages.append(self._random_message())
 
196
  self._cached_overlay, self._dirty = None, True
197
 
198
  def _random_message(self):
 
199
  user = str(self.rng.choice(["nightbot", "viewer", "catjam", "shadow", "speedrunner", "chattycathy", "kappaking"]))
 
200
  if self.rng.random() < 0.5: user += str(self.rng.integers(10, 999))
 
201
  text = str(self.rng.choice(["pog", "lol", "gg", "nice", "W", "L", "no shot", "crazy", "clip it", "cooking", "unlucky"]))
 
202
  color = tuple(int(x) for x in self.rng.choice([(255, 120, 0), (0, 180, 255), (255, 0, 180), (0, 255, 120)]))
 
203
  return {"user": user, "text": text, "color": color}
204
 
205
  def _get_pil_font(self, size_px):
 
206
  if not self.font_path: return None
 
207
  if size_px in self._pil_fonts: return self._pil_fonts[size_px]
 
208
  try:
 
209
  from PIL import ImageFont
210
- font = ImageFont.truetype(self.font_path, size=max(1, size_px))
211
- self._pil_fonts[size_px] = font
212
- return font
213
  except: return None
214
 
215
  def maybe_append(self, frame_idx):
 
216
  if int(frame_idx) % self.every_n_frames == 0:
 
217
  self.messages.append(self._random_message())
 
218
  self._dirty = True
219
 
220
  def _render_cache(self):
 
221
  rw = min(self.region_w, max(40, self.W - 2 * self.margin))
 
222
  rh = min(self.region_h, max(40, self.H - 2 * self.margin))
 
223
  pil_font = self._get_pil_font(int(round(np.clip(20.0 * (self.H / 720.0), 14.0, 30.0))))
 
224
  if pil_font is None:
225
- self._cached_overlay = None
226
- return
 
227
  try:
 
228
  from PIL import Image, ImageDraw
 
229
  pil = Image.new("RGBA", (rw, rh), (0, 0, 0, 0))
 
230
  draw = ImageDraw.Draw(pil)
231
- line_h = _clamp_int(int(round(float(getattr(pil_font, "size", 18)) * 1.25)), 14, 34)
 
 
232
  lines = list(self.messages)[-min(self.num_lines, max(1, rh // line_h)):]
 
233
  local_y = rh - line_h if self.corner in ("bl", "br") else 0
 
234
  for msg in lines:
 
235
  user = f"{msg['user']}: "
236
- draw.text((0, local_y), user, font=pil_font, fill=tuple(msg['color'][::-1]))
 
 
237
  tw = draw.textlength(user, font=pil_font)
 
238
  draw.text((tw, local_y), msg['text'], font=pil_font, fill=(240, 240, 240))
 
239
  local_y += (-line_h if self.corner in ("bl", "br") else line_h)
 
240
  self._cached_overlay = cv2.cvtColor(np.asarray(pil), cv2.COLOR_RGBA2BGRA)
 
241
  except: self._cached_overlay = None
242
 
243
  def draw(self, img_bgr):
 
244
  if img_bgr is None: return img_bgr
245
- if self._dirty:
246
- self._render_cache()
247
- self._dirty = False
248
  if self._cached_overlay is not None:
 
249
  rw = min(self.region_w, max(40, self.W - 2 * self.margin))
 
250
  rh = min(self.region_h, max(40, self.H - 2 * self.margin))
 
251
  if self.corner == "tl": x, y = self.margin, self.margin
 
252
  elif self.corner == "tr": x, y = self.W - self.margin - rw, self.margin
 
253
  elif self.corner == "bl": x, y = self.margin, self.H - self.margin - rh
 
254
  else: x, y = self.W - self.margin - rw, self.H - self.margin - rh
 
255
  _alpha_blend_bgra_onto_bgr(img_bgr, self._cached_overlay, x, y)
 
256
  return img_bgr
257
 
258
- def k4_to_K3(k4):
259
- return np.array([[k4[0], 0, k4[2]], [0, k4[1], k4[3]], [0, 0, 1]], dtype=np.float32)
 
260
 
261
  def bbox_xywh_to_bbx_xys(bbox_xywh, base_enlarge=1.0):
 
262
  x, y, w, h = [float(v) for v in bbox_xywh]
 
263
  return np.array([x + 0.5 * w, y + 0.5 * h, max(w, h) * float(base_enlarge)], dtype=np.float32)
264
 
265
  def clamp_bbox_xywh_to_image(bbox_xywh, W, H, min_size=1.0):
 
266
  x, y, w, h = [float(v) for v in bbox_xywh]
 
267
  W, H = float(W), float(H)
 
268
  if W <= 0 or H <= 0: return [0.0, 0.0, 0.0, 0.0]
 
269
  x2, y2 = x + w, y + h
 
270
  x1c = float(np.clip(x, 0.0, max(0.0, W - 1.0)))
 
271
  y1c = float(np.clip(y, 0.0, max(0.0, H - 1.0)))
 
272
  x2c = float(np.clip(x2, 0.0, W))
 
273
  y2c = float(np.clip(y2, 0.0, H))
 
274
  if x2c <= x1c: x2c = min(W, x1c + float(min_size))
 
275
  if y2c <= y1c: y2c = min(H, y1c + float(min_size))
 
276
  wc = max(0.0, x2c - x1c)
 
277
  hc = max(0.0, y2c - y1c)
 
278
  return [x1c, y1c, wc, hc]
279
 
280
  def draw_bbox_xywh_and_center(img_bgr, bbox_xywh, color=(255, 255, 0)):
 
281
  x, y, w, h = [float(v) for v in bbox_xywh]
 
282
  cv2.rectangle(img_bgr, (int(x), int(y)), (int(x+w), int(y+h)), color, 2)
 
283
  cv2.circle(img_bgr, (int(x+w/2), int(y+h/2)), 4, (0, 0, 255), -1)
284
 
285
  def vis_label_and_color(v: int):
 
286
  if v == 2: return "VIS", (0, 255, 0)
 
287
  if v == 1: return "OCC", (0, 165, 255)
 
288
  return "OFF", (160, 160, 160)
289
 
290
  def draw_vis_text_and_points(img_bgr, kpts2d_xy, vis17):
 
291
  for k in range(17):
 
292
  v = int(vis17[k])
 
293
  label, color = vis_label_and_color(v)
 
294
  x, y = int(round(kpts2d_xy[k, 0])), int(round(kpts2d_xy[k, 1]))
 
295
  if v > 0: cv2.circle(img_bgr, (x, y), 4, color, -1)
 
296
  cv2.putText(img_bgr, f"{k}:{label}", (x + 6, y - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 1, cv2.LINE_AA)
297
 
298
  def build_T_wc(pos_world, quat_world_xyzw):
 
299
  T = np.eye(4, dtype=np.float64)
 
300
  T[:3, :3] = R.from_quat(np.asarray(quat_world_xyzw, dtype=np.float64)).as_matrix()
 
301
  T[:3, 3] = np.asarray(pos_world, dtype=np.float64)
 
302
  return T
303
 
304
  def compute_velocity(mats, fps=30.0):
 
305
  N = len(mats)
 
306
  if N < 2: return np.zeros((N, 3), dtype=np.float32), np.zeros((N, 3), dtype=np.float32)
 
307
  R_curr = mats[:, :3, :3]
 
308
  R_diff = np.matmul(R_curr[1:], np.transpose(R_curr[:-1], (0, 2, 1)))
 
309
  rv = R.from_matrix(R_diff).as_rotvec()
 
310
  angvel = np.zeros((N, 3), dtype=np.float32)
 
311
  angvel[1:] = rv
 
312
  t_curr = mats[:, :3, 3]
 
313
  tvel = np.zeros((N, 3), dtype=np.float32)
 
314
  tvel[1:] = t_curr[1:] - t_curr[:-1]
 
315
  return angvel.astype(np.float32), tvel.astype(np.float32)
316
 
 
 
317
  def _compute_vitpose_selected_indices(num_frames, fps, bucket_seconds, frames_per_bucket, sampling="uniform", seed=123):
 
318
  if num_frames <= 0: return []
 
319
  rng = np.random.default_rng(int(seed))
 
320
  selected = []
 
321
  bucket_len = max(1, int(round(float(bucket_seconds) * float(fps))))
 
322
  b_start = 0
 
323
  while b_start < num_frames:
 
324
  b_end = min(num_frames, b_start + bucket_len)
 
325
  k = min(int(frames_per_bucket), b_end - b_start)
 
326
  if k > 0:
327
- if sampling == "random":
328
- idxs = np.sort(rng.choice(np.arange(b_start, b_end), size=k, replace=False)).tolist()
329
- elif sampling == "linspace":
330
- idxs = sorted(list(set(np.linspace(b_start, b_end - 1, k, dtype=int).tolist())))
331
- else: # uniform
 
 
332
  if k == 1: idxs = [b_start + (b_end - b_start) // 2]
333
- else:
334
- step = (b_end - b_start) // k
335
- idxs = [min(b_start + i * step, b_end - 1) for i in range(k)]
336
  selected.extend(idxs)
 
337
  b_start = b_end
 
338
  return sorted(list(set(selected)))
339
 
340
- # Initialize model variable
 
341
  _SMPLX_MODEL = None
 
342
  _SMPLX_DEVICE = None
343
 
344
  def _get_smplx_model(device):
 
345
  global _SMPLX_MODEL, _SMPLX_DEVICE
346
- if _SMPLX_MODEL is not None and _SMPLX_DEVICE == device:
347
- return _SMPLX_MODEL
 
348
  from hmr4d.utils.smplx_utils import make_smplx
 
349
  _SMPLX_MODEL = make_smplx("supermotion").to(device).eval()
 
350
  _SMPLX_DEVICE = device
 
351
  return _SMPLX_MODEL
352
 
353
- # SMPL Renderer
 
354
  class SmplIncamRenderer:
 
355
  def __init__(self, width, height, K4, device="cuda", smplx2smpl_path="hmr4d/utils/body_model/smplx2smpl_sparse.pt"):
 
356
  from hmr4d.utils.smplx_utils import make_smplx
 
357
  from hmr4d.utils.vis.renderer import Renderer
 
358
  self.torch = torch
 
359
  self.device = device
 
360
  self.smplx = make_smplx("supermotion").to(device).eval()
361
- self.smplx2smpl = None
362
- self.faces = None
 
363
  try:
 
364
  self.smplx2smpl = torch.load(smplx2smpl_path).to(device)
 
365
  self.faces = make_smplx("smpl").faces
 
366
  except: self.faces = self.smplx.faces
 
367
  self.K_torch = torch.from_numpy(k4_to_K3(K4)).to(device)
 
368
  self.renderer = Renderer(width, height, device=device, faces=self.faces, K=self.K_torch)
369
 
370
  @torch.no_grad()
 
371
  def render(self, img_rgb_uint8, global_orient_aa, body_pose_aa, betas_10, transl_xyz, fl, pp):
 
372
  K3_torch = torch.from_numpy(np.array([[fl[0], 0, pp[0]], [0, fl[1], pp[1]], [0, 0, 1]], dtype=np.float32)).to(self.device)
 
373
  self.renderer.set_intrinsic(K3_torch)
374
- params = {
375
- "global_orient": torch.from_numpy(global_orient_aa[None]).float().to(self.device),
376
- "body_pose": torch.from_numpy(body_pose_aa[None]).float().to(self.device),
377
- "betas": torch.from_numpy(betas_10[None]).float().to(self.device),
378
- "transl": torch.from_numpy(transl_xyz[None]).float().to(self.device),
379
- }
380
- out = self.smplx(**params)
381
- verts = out.vertices[0]
382
  if self.smplx2smpl is not None and verts.dim() == 2: verts = torch.matmul(self.smplx2smpl, verts)
 
383
  img_out = self.renderer.render_mesh(verts, img_rgb_uint8, [0.8, 0.8, 0.8])
 
384
  return img_out
385
-
386
  @torch.no_grad()
 
387
  def get_verts(self, global_orient_aa, body_pose_aa, betas_10, transl_xyz):
388
- params = {
389
- "global_orient": torch.from_numpy(global_orient_aa[None]).float().to(self.device),
390
- "body_pose": torch.from_numpy(body_pose_aa[None]).float().to(self.device),
391
- "betas": torch.from_numpy(betas_10[None]).float().to(self.device),
392
- "transl": torch.from_numpy(transl_xyz[None]).float().to(self.device),
393
- }
394
- out = self.smplx(**params)
395
- verts = out.vertices[0]
396
- if self.smplx2smpl is not None and verts.dim() == 2:
397
- verts = torch.matmul(self.smplx2smpl, verts)
398
  return verts
399
 
 
 
400
  def _as_betas10(betas_any) -> np.ndarray:
 
401
  betas = np.asarray(betas_any, dtype=np.float32).reshape(-1)
402
- betas10 = np.zeros(10, dtype=np.float32)
403
- n = min(10, betas.size)
 
404
  if n > 0: betas10[:n] = betas[:n]
 
405
  return betas10
406
 
407
  def load_betas10_from_npz(npz_path, key="betas", index=None):
408
- with np.load(npz_path, allow_pickle=True) as data:
409
- arr = data[key]
 
410
  if arr.ndim == 0: arr = np.asarray(arr).reshape(1)
 
411
  if arr.ndim == 1: betas = arr
412
- elif arr.ndim == 2:
413
- row_idx = 0 if index is None else int(index)
414
- betas = arr[row_idx]
415
  else: raise ValueError(f"Bad betas shape: {arr.shape}")
 
416
  return _as_betas10(betas)
417
 
418
- def _default_shape_npz_path() -> str:
419
- return os.path.join(os.path.dirname(__file__), "shape.npz")
 
420
 
421
  def parse_smpl_inputs_from_row(row, override_betas10=None, keep_unity_scale=False, transl_source="pelvis", transl_y_offset_m=0.0):
 
422
  C = np.diag([1.0, -1.0, 1.0]).astype(np.float64)
 
423
  cam_rot_w_quat = np.array(row["cam_rot_world"], dtype=np.float64)
 
424
  R_cam_w = R.from_quat(cam_rot_w_quat).as_matrix()
 
425
  pel_rot_w_quat = np.array(row["pelvis_rot_world"], dtype=np.float64)
 
426
  R_pel_w = R.from_quat(pel_rot_w_quat).as_matrix()
 
 
 
 
 
427
  R_rel_unity = R_cam_w.T @ R_pel_w
 
428
  R_cv = C @ R_rel_unity @ C
429
- R_final = R_cv @ R.from_euler('z', 180, degrees=True).as_matrix()
 
 
430
  global_orient_aa = R.from_matrix(R_final).as_rotvec().astype(np.float32)
431
 
 
 
432
  smpl_scale = float(row.get("smpl_root_world_scale", 1.0))
 
433
  pelvis_cam_unity = np.asarray(row["smpl_incam_transl"], dtype=np.float64).reshape(3)
 
434
  root_cam_unity = np.asarray(row.get("smpl_root_incam_transl", [0.0, 0.0, 0.0]), dtype=np.float64).reshape(3)
 
435
  pelvis_cam_unity = pelvis_cam_unity + np.array([0.0, float(transl_y_offset_m), 0.0], dtype=np.float64)
436
 
 
 
437
  if str(transl_source).strip().lower() == "root": target_cam_unity = root_cam_unity
 
438
  else:
 
439
  if bool(keep_unity_scale): target_cam_unity = pelvis_cam_unity
 
440
  else:
 
441
  if abs(smpl_scale) > 1e-8: target_cam_unity = root_cam_unity + (pelvis_cam_unity - root_cam_unity) / smpl_scale
 
442
  else: target_cam_unity = pelvis_cam_unity
 
443
  target_cam_cv = (C @ target_cam_unity).astype(np.float64)
444
 
 
 
445
  pose = np.asarray(row["smplx_pose"], dtype=np.float32)
 
446
  body_pose = pose[3:66].astype(np.float32)
 
447
  betas10 = _as_betas10(override_betas10)
448
-
 
 
449
  return {
 
450
  "global_orient": global_orient_aa, "body_pose": body_pose, "betas": betas10,
 
451
  "target_cam_cv": target_cam_cv, "cam_rot_w_quat": cam_rot_w_quat,
 
452
  "cam_pos_world": np.asarray(row["cam_pos_world"], dtype=np.float64).reshape(3),
 
453
  "pelvis_pos_world": np.asarray(row["pelvis_pos_world"], dtype=np.float64).reshape(3),
 
454
  "smpl_scale": smpl_scale, "root_cam_unity": root_cam_unity
 
455
  }
456
 
 
 
457
  def batch_smpl_forward(betas, global_orient, body_pose, device):
 
458
  model = _get_smplx_model(device)
 
459
  N = len(betas)
460
- chunk_size = 4096
461
- pelvis_list = []
 
462
  with torch.no_grad():
 
463
  for i in range(0, N, chunk_size):
 
464
  b_betas = torch.from_numpy(betas[i:i+chunk_size]).float().to(device)
 
465
  b_go = torch.from_numpy(global_orient[i:i+chunk_size]).float().to(device)
 
466
  b_bp = torch.from_numpy(body_pose[i:i+chunk_size]).float().to(device)
 
467
  b_tr = torch.zeros((len(b_betas), 3), dtype=torch.float32, device=device)
 
468
  out = model(betas=b_betas, global_orient=b_go, body_pose=b_bp, transl=b_tr)
 
469
  pelvis_list.append(out.joints[:, 0, :].detach().cpu().numpy())
 
470
  return np.concatenate(pelvis_list, axis=0)
471
 
 
 
472
  def main():
 
473
  parser = argparse.ArgumentParser()
 
474
  parser.add_argument("--input", required=True)
 
475
  parser.add_argument("--output", required=True)
 
476
  parser.add_argument("--debug", action="store_true")
 
477
  parser.add_argument("--vitpose", action="store_true")
478
- parser.add_argument("--genmo", action="store_true")
 
 
479
  parser.add_argument("--dpvo", action="store_true")
 
480
  parser.add_argument("--smplx", action="store_true")
 
481
  parser.add_argument("--debug_no_coco", action="store_true")
 
482
  parser.add_argument("--shape_npz", default=_default_shape_npz_path())
 
483
  parser.add_argument("--vitpose_use_all_frames", action="store_true")
 
484
  parser.add_argument("--vitpose_bucket_seconds", type=float, default=12.0)
 
485
  parser.add_argument("--vitpose_frames_per_bucket", type=int, default=36)
 
486
  parser.add_argument("--vitpose_sampling", type=str, default="random")
 
487
  parser.add_argument("--vitpose_seed", type=int, default=123)
 
488
  parser.add_argument("--ui_dir", type=str, default=None)
 
489
  parser.add_argument("--ui_show_prob", type=float, default=0.25)
 
490
  parser.add_argument("--ui_max_images", type=int, default=3)
 
491
  parser.add_argument("--ui_hold_min_s", type=float, default=0.7)
 
492
  parser.add_argument("--ui_hold_max_s", type=float, default=5.0)
 
493
  parser.add_argument("--ui_seed", type=int, default=None)
 
494
  parser.add_argument("--keep_unity_scale", action="store_true")
 
495
  parser.add_argument("--transl_source", type=str, default="pelvis")
 
496
  parser.add_argument("--transl_y_offset_m", type=float, default=-0.020)
 
497
  parser.add_argument("--world_y_offset_m", type=float, default=1.3415)
 
498
  parser.add_argument("--vit_batch_size", type=int, default=512, help="Batch size for in-memory ViT extraction")
 
499
  args = parser.parse_args()
500
 
 
 
501
  if not (args.vitpose or args.genmo or args.dpvo or args.smplx):
 
502
  args.vitpose = args.genmo = args.dpvo = args.smplx = True
503
 
 
 
504
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
505
  print(f"Running STREAMING processing on {device.upper()}...")
506
-
507
- # --- INIT ViT MODEL ONCE ---
 
508
  vit_model = None
 
509
  if args.genmo and Extractor is not None:
 
510
  print("Initializing ViT Extractor (HMR2)...")
 
511
  extractor_wrapper = Extractor(tqdm_leave=False)
 
512
  vit_model = extractor_wrapper.extractor
 
513
  vit_model.eval()
 
514
  vit_model.to(device)
515
 
 
 
516
  override_betas10 = load_betas10_from_npz(args.shape_npz, key="betas")
 
517
  temp_ann_dir = os.path.join(args.output, "vitpose", "temp_annotations")
 
518
  os.makedirs(temp_ann_dir, exist_ok=True)
 
519
  jsonl_files = sorted(glob(os.path.join(args.input, "sequence_*.jsonl")))
520
-
 
 
521
  global_J_reg = None
 
522
  j_reg_path = "third_party/GVHMR/inputs/checkpoints/body_models/smpl_neutral_J_regressor.pt"
 
523
  if os.path.exists(j_reg_path) and device == "cuda":
 
524
  global_J_reg = torch.load(j_reg_path, map_location=device)
525
 
 
 
526
  for jsonl_idx, jsonl_path in enumerate(jsonl_files):
 
527
  seq_name = os.path.splitext(os.path.basename(jsonl_path))[0].replace("sequence_", "")
 
528
  print(f"[{jsonl_idx+1}/{len(jsonl_files)}] Processing {seq_name}...")
529
-
 
 
530
  prof = {"smpl_batch": 0.0, "video_read": 0.0, "overlay": 0.0, "vit_process": 0.0,
 
531
  "sparse_write": 0.0, "loop_total": 0.0, "save_files": 0.0, "debug_rend": 0.0, "prep": 0.0}
532
 
 
 
533
  t_start_seq = time.perf_counter()
534
-
535
  jsonl_dir = os.path.dirname(os.path.abspath(jsonl_path))
 
536
  video_path = os.path.join(jsonl_dir, f"video_{seq_name}.mp4")
 
537
  if not os.path.exists(video_path): video_path = os.path.join(jsonl_dir, "video.mp4")
538
 
539
- # SPARSE WRITING FOLDER
 
540
  out_img_folder = os.path.join(args.output, "images", seq_name)
 
541
  os.makedirs(out_img_folder, exist_ok=True)
542
- # Clean existing only if needed, usually we overwrite
543
- # for p in glob(os.path.join(out_img_folder, "img_*.jpg")): try: os.remove(p) except: pass
544
 
545
  with open(jsonl_path, "r") as f: lines = f.readlines()
 
546
  lines = lines[1:] if len(lines) > 0 else []
 
547
  num_frames = len(lines)
 
548
  if num_frames <= 0: continue
549
 
 
 
550
  genmo_out = os.path.join(args.output, "genmo_features", f"{seq_name}.pt")
 
551
  smplx_out = os.path.join(args.output, "smplx_incam", f"{seq_name}_smplx.npz")
 
552
  smplx_global_out = os.path.join(args.output, "smplx_global", f"{seq_name}_global.npz")
 
553
  dpvo_dir = os.path.join(args.output, "dpvo", seq_name)
 
554
  for p in [genmo_out, smplx_out, smplx_global_out, dpvo_dir]:
 
555
  if p: os.makedirs(os.path.dirname(p), exist_ok=True)
556
 
 
 
557
  selected_set = set()
 
558
  if args.vitpose:
 
559
  if args.vitpose_use_all_frames: selected_indices = list(range(num_frames))
 
560
  else:
 
561
  selected_indices = _compute_vitpose_selected_indices(
 
562
  num_frames, FPS, args.vitpose_bucket_seconds,
 
563
  args.vitpose_frames_per_bucket, args.vitpose_sampling, args.vitpose_seed
 
564
  )
 
565
  selected_set = set(selected_indices)
566
 
 
 
567
  cap = cv2.VideoCapture(video_path)
 
568
  if not cap.isOpened(): continue
 
569
  W = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
 
570
  H = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
571
-
 
 
572
  resolved_ui_dir = args.ui_dir if args.ui_dir else _find_ui_dir()
 
573
  chat_font_path = _find_font_path(resolved_ui_dir)
 
574
  seq_seed = int(zlib.crc32(seq_name.encode("utf-8")) & 0xFFFFFFFF)
 
575
  chat_aug = SimpleChatOverlay(W, H, seed=seq_seed, num_lines=7, font_path=chat_font_path)
576
- ui_aug = SimpleUIOverlay(W, H, seed=((seq_seed ^ 0xA5A5A5A5) & 0xFFFFFFFF), ui_dir=resolved_ui_dir,
 
 
577
  max_images=args.ui_max_images, show_prob=args.ui_show_prob)
578
 
 
 
579
  # --- BATCH SMPL (GPU) ---
 
580
  t0_smpl = time.perf_counter()
581
-
 
 
582
  smpl_precalc_data = []
 
583
  debug_global_verts_cpu = []
 
584
  parsed_rows = []
585
-
 
 
586
  for line in lines:
 
587
  row = json.loads(line)
 
588
  parsed_rows.append(parse_smpl_inputs_from_row(row, override_betas10, args.keep_unity_scale, args.transl_source, args.transl_y_offset_m))
589
-
 
 
590
  all_betas = np.stack([d['betas'] for d in parsed_rows])
 
591
  all_go = np.stack([d['global_orient'] for d in parsed_rows])
 
592
  all_bp = np.stack([d['body_pose'] for d in parsed_rows])
593
-
 
 
 
 
594
  all_pelvis0 = batch_smpl_forward(all_betas, all_go, all_bp, device=device)
595
-
 
 
596
  C = np.diag([1.0, -1.0, 1.0]).astype(np.float64)
597
- world_fix_R, world_fix_R4, world_fix_R4_inv = None, None, None
 
 
598
  all_go_w, all_pelvis_pos_w_cv = [], []
599
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
600
  for i, d in enumerate(parsed_rows):
 
 
 
601
  R_cam_w_unity = R.from_quat(d['cam_rot_w_quat']).as_matrix()
602
- R_cam_w_cv = C @ R_cam_w_unity @ C
 
 
 
 
 
 
 
 
 
 
603
  R_pelvis_c_cv = R.from_rotvec(d['global_orient'].astype(np.float64)).as_matrix()
604
- R_pelvis_w_cv_raw = R_cam_w_cv @ R_pelvis_c_cv
605
- if i == 0:
606
- go_w0 = R.from_matrix(R_pelvis_w_cv_raw).as_rotvec().astype(np.float32)
607
- model_gpu = _get_smplx_model(device)
608
- with torch.no_grad():
609
- out_fix = model_gpu(betas=torch.from_numpy(d['betas'][None]).float().to(device),
610
- global_orient=torch.from_numpy(go_w0[None]).float().to(device),
611
- body_pose=torch.from_numpy(d['body_pose'][None]).float().to(device))
612
- joints = out_fix.joints[0].detach().cpu().numpy().astype(np.float64)
613
- pelvis_y = float(joints[0, 1])
614
- head_y = float(joints[15, 1]) if joints.shape[0] > 15 else pelvis_y
615
- if head_y < pelvis_y: world_fix_R = R.from_euler("x", 180, degrees=True).as_matrix().astype(np.float64)
616
- else: world_fix_R = np.eye(3, dtype=np.float64)
617
- world_fix_R4 = np.eye(4, dtype=np.float64)
618
- world_fix_R4[:3, :3] = world_fix_R
619
- world_fix_R4_inv = np.eye(4, dtype=np.float64); world_fix_R4_inv[:3, :3] = world_fix_R.T
620
-
621
- R_pelvis_w_cv = world_fix_R @ R_pelvis_w_cv_raw
622
  all_go_w.append(R.from_matrix(R_pelvis_w_cv).as_rotvec().astype(np.float32))
 
 
 
 
623
 
624
  pelvis_pos_w_unity = d['pelvis_pos_world']
 
625
  root_pos_w_unity = (R_cam_w_unity @ d['root_cam_unity'] + d['cam_pos_world']).reshape(3)
 
626
  smpl_scale = d['smpl_scale']
 
627
  transl_source_local = str(args.transl_source).strip().lower()
 
628
  if transl_source_local == "root": target_pos_w_unity = root_pos_w_unity
 
629
  else:
 
630
  if bool(args.keep_unity_scale): target_pos_w_unity = pelvis_pos_w_unity
 
631
  else:
 
632
  if abs(smpl_scale) > 1e-8: target_pos_w_unity = root_pos_w_unity + (pelvis_pos_w_unity - root_pos_w_unity) / smpl_scale
 
633
  else: target_pos_w_unity = pelvis_pos_w_unity
634
- pelvis_pos_w_cv = (C @ target_pos_w_unity).astype(np.float64)
635
- pelvis_pos_w_cv = (world_fix_R @ pelvis_pos_w_cv.reshape(3, 1)).reshape(3)
636
- all_pelvis_pos_w_cv.append(pelvis_pos_w_cv)
637
 
638
- all_go_w = np.stack(all_go_w)
639
- all_pelvis0_w = batch_smpl_forward(all_betas, all_go_w, all_bp, device=device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
640
 
641
- for i in range(num_frames):
642
- d = parsed_rows[i]
643
- transl_c = (d['target_cam_cv'] - all_pelvis0[i]).astype(np.float32)
644
- if str(args.transl_source) == "root": transl_w = all_pelvis_pos_w_cv[i].astype(np.float32)
645
  else: transl_w = (all_pelvis_pos_w_cv[i] - all_pelvis0_w[i]).astype(np.float32)
 
 
 
646
  smpl_precalc_data.append({
 
647
  "go_c": d['global_orient'], "bp": d['body_pose'], "beta": d['betas'], "tr_c": transl_c,
648
- "go_w": all_go_w[i], "tr_w": transl_w, "world_fix_R4": world_fix_R4, "world_fix_R4_inv": world_fix_R4_inv
 
 
 
 
649
  })
650
-
 
 
651
  prof["smpl_batch"] = time.perf_counter() - t0_smpl
652
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
653
  t0_gap = time.perf_counter()
 
654
  smpl_renderer = None
 
655
  vid_incam, vid_global = None, None
 
656
  debug_end_frame = min(num_frames, DEBUG_NUM_FRAMES)
 
657
  if args.debug:
 
658
  os.makedirs(os.path.join(args.output, "debug_renders"), exist_ok=True)
 
659
  if debug_end_frame > 0:
 
660
  try:
661
- # REUSE parsed_rows!
662
  K4_init = np.asarray(json.loads(lines[0])["cam_intrinsics"], dtype=np.float32)
 
663
  smpl_renderer = SmplIncamRenderer(W, H, K4_init, device=device)
 
664
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
 
665
  vid_incam = cv2.VideoWriter(os.path.join(args.output, "debug_renders", f"{seq_name}_incam.mp4"), fourcc, FPS, (W, H))
666
- vid_global = cv2.VideoWriter(os.path.join(args.output, "debug_renders", f"{seq_name}_global.mp4"), fourcc, FPS, (512, 512))
 
 
 
 
667
  except: pass
668
 
 
 
669
  # --- MAIN LOOP ---
670
- coco_subset = []
671
- img_paths = []
672
- cam_intrinsics = []
673
  cam_T_wc_cv_all, cam_T_w2c_cv_all = [], []
 
674
  dpvo_poses, dpvo_intrinsics = [], []
675
- bboxes, bbx_xys_all, kp2d_all, K_fullimg_all = [], [], [], []
 
 
676
  global_orient_c_all, transl_c_all, body_pose_all, betas_all = [], [], [], []
 
677
  global_orient_w_all, transl_w_all = [], []
678
- C4 = np.diag([1.0, -1.0, 1.0, 1.0]).astype(np.float64)
679
 
680
- # ViT Batching Lists
681
- vit_img_batch = []
682
- all_vit_features = []
683
 
684
  ret, _ = cap.read() # skip 0
 
685
  prof["prep"] = time.perf_counter() - t0_gap
686
 
 
 
687
  t_start_loop = time.perf_counter()
 
688
  for idx in tqdm(range(num_frames), desc="Frames", leave=False):
 
689
  t0_read = time.perf_counter()
 
690
  ret, img_bgr = cap.read()
 
691
  prof["video_read"] += (time.perf_counter() - t0_read)
 
692
  if not ret: break
693
-
 
 
694
  img_filename = f"img_{idx:05d}.jpg"
 
695
  img_abs_path = os.path.join(out_img_folder, img_filename)
696
-
697
- # --- OVERLAY ---
 
698
  t0_ov = time.perf_counter()
 
699
  chat_aug.maybe_append(idx)
 
700
  chat_aug.draw(img_bgr)
 
701
  ui_aug.draw(img_bgr)
 
702
  prof["overlay"] += (time.perf_counter() - t0_ov)
703
 
704
- # --- METADATA ---
705
- row = json.loads(lines[idx])
 
 
706
  K4 = np.asarray(row["cam_intrinsics"], dtype=np.float32)
 
707
  kpts_raw = np.asarray(row["kpts_2d"], dtype=np.float32).reshape(-1, 2)[:17]
 
708
  vis_raw = np.asarray(row["kpts_vis"], dtype=np.int32)[:17]
709
- if vis_raw.shape[0] >= 5: vis_raw[3] = 1; vis_raw[4] = 1
 
 
710
  bbox = clamp_bbox_xywh_to_image(row["bbox"], W, H)
711
-
 
 
712
  sd = smpl_precalc_data[idx]
 
713
  global_orient_c_all.append(sd['go_c'])
 
714
  transl_c_all.append(sd['tr_c'])
 
715
  global_orient_w_all.append(sd['go_w'])
 
716
  transl_w_all.append(sd['tr_w'])
 
717
  body_pose_all.append(sd['bp'])
 
718
  betas_all.append(sd['beta'])
719
-
 
 
720
  bboxes.append(np.asarray(bbox, dtype=np.float32))
 
721
  bbx_xys_all.append(bbox_xywh_to_bbx_xys(bbox))
 
722
  kp2d_all.append(np.concatenate([kpts_raw, (vis_raw > 0).astype(np.float32)[:, None]], axis=1))
 
723
  K_fullimg_all.append(k4_to_K3(K4))
724
 
 
 
725
  img_rel = os.path.join("images", seq_name, img_filename).replace("\\", "/")
 
726
  img_paths.append(img_rel)
727
- p_w, q_w = np.asarray(row["cam_pos_world"], dtype=np.float32), np.asarray(row["cam_rot_world"], dtype=np.float32)
 
 
 
 
 
 
 
 
 
 
 
 
728
  cam_T_wc = build_T_wc(p_w, q_w)
729
- T_cw = np.linalg.inv(cam_T_wc)
730
-
731
- cam_T_wc_cv = (C4 @ cam_T_wc @ C4).astype(np.float32)
732
- cam_T_w2c_cv = (C4 @ T_cw @ C4).astype(np.float32)
733
- if sd['world_fix_R4'] is not None:
734
- cam_T_wc_cv = (sd['world_fix_R4'] @ cam_T_wc_cv.astype(np.float64)).astype(np.float32)
735
- cam_T_w2c_cv = (cam_T_w2c_cv.astype(np.float64) @ sd['world_fix_R4_inv']).astype(np.float32)
 
 
 
 
 
 
 
 
 
 
736
  cam_T_wc_cv_all.append(cam_T_wc_cv)
 
737
  cam_T_w2c_cv_all.append(cam_T_w2c_cv)
 
738
  dpvo_poses.append(f"{p_w[0]} {p_w[1]} {p_w[2]} {q_w[0]} {q_w[1]} {q_w[2]} {q_w[3]}")
 
739
  dpvo_intrinsics.append(K4.astype(np.float32))
740
 
741
- # --- BRANCH A: Genmo (ViT Extraction) ---
 
742
  if args.genmo and vit_model is not None:
 
743
  t0_vit = time.perf_counter()
744
- # Process image in RAM (Crop/Resize/Norm)
745
- img_tensor = _process_image_memory(img_bgr, bbox, img_size=256) # Returns CHW numpy
 
746
  vit_img_batch.append(img_tensor)
747
 
748
  if len(vit_img_batch) >= args.vit_batch_size:
 
749
  batch_np = np.stack(vit_img_batch)
 
750
  batch_t = torch.from_numpy(batch_np).to(device, non_blocking=True)
 
751
  with torch.inference_mode():
 
752
  with torch.amp.autocast("cuda"):
 
753
  feats = vit_model({"img": batch_t})
 
754
  all_vit_features.append(feats.detach().cpu())
 
755
  vit_img_batch = []
 
756
  prof["vit_process"] += (time.perf_counter() - t0_vit)
757
 
758
- # --- BRANCH B: VitPose (Sparse Write) ---
 
759
  if args.vitpose and (idx in selected_set):
 
760
  t0_wr = time.perf_counter()
761
- # Use faster write if possible, 90 quality
762
  cv2.imwrite(img_abs_path, img_bgr, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
 
763
  kpts_coco = []
 
764
  for k in range(17): kpts_coco.extend([float(kpts_raw[k, 0]), float(kpts_raw[k, 1]), int(vis_raw[k])])
 
765
  coco_subset.append(({"file_name": img_rel, "width": W, "height": H},
 
766
  {"category_id": 1, "bbox": bbox, "area": float(bbox[2]*bbox[3]), "iscrowd": 0, "keypoints": kpts_coco, "num_keypoints": int(np.sum(vis_raw > 0))}))
 
767
  prof["sparse_write"] += (time.perf_counter() - t0_wr)
768
 
769
- # --- BRANCH C: Debug Video ---
 
770
  if args.debug and idx < debug_end_frame and smpl_renderer:
 
771
  t0_dbg = time.perf_counter()
 
772
  dbg = img_bgr.copy()
 
773
  try: draw_bbox_xywh_and_center(dbg, bbox)
 
774
  except: pass
 
775
  try:
 
776
  rgb = smpl_renderer.render(dbg[:, :, ::-1].copy(), sd['go_c'], sd['bp'], sd['beta'], sd['tr_c'], K4[:2], K4[2:])
 
777
  dbg = rgb[:, :, ::-1].copy()
 
778
  except: pass
 
779
  if not args.debug_no_coco:
 
780
  draw_vis_text_and_points(dbg, kpts_raw, vis_raw)
 
781
  if vid_incam: vid_incam.write(dbg)
782
-
 
 
783
  if vid_global:
 
784
  verts_w = smpl_renderer.get_verts(sd['go_w'], sd['bp'], sd['beta'], sd['tr_w']).float()
 
785
  debug_global_verts_cpu.append(verts_w.detach().cpu())
 
786
  prof["debug_rend"] += (time.perf_counter() - t0_dbg)
787
 
788
- # Flush remaining ViT batch
 
789
  if args.genmo and len(vit_img_batch) > 0 and vit_model is not None:
 
790
  t0_vit = time.perf_counter()
 
791
  batch_np = np.stack(vit_img_batch)
 
792
  batch_t = torch.from_numpy(batch_np).to(device, non_blocking=True)
 
793
  with torch.inference_mode():
 
794
  with torch.amp.autocast("cuda"):
 
795
  feats = vit_model({"img": batch_t})
 
796
  all_vit_features.append(feats.detach().cpu())
 
797
  prof["vit_process"] += (time.perf_counter() - t0_vit)
798
 
 
 
799
  prof["loop_total"] = time.perf_counter() - t_start_loop
 
800
  cap.release()
 
801
  if vid_incam: vid_incam.release()
802
-
803
- # --- GLOBAL RENDER POST-LOOP ---
 
804
  t0_dbg = time.perf_counter()
 
805
  if vid_global and len(debug_global_verts_cpu) > 0:
 
806
  try:
807
- from hmr4d.utils.vis.renderer import Renderer, get_global_cameras_static, get_ground_params_from_points
 
 
 
 
 
 
 
 
 
 
 
 
808
  from hmr4d.utils.geo.hmr_cam import create_camera_sensor
809
- _, _, K_global = create_camera_sensor(512, 512, 24)
810
- global_renderer = Renderer(512, 512, device=device, faces=smpl_renderer.faces, K=K_global.to(device), bin_size=0)
 
 
 
 
 
 
 
811
  verts_seq = torch.stack(debug_global_verts_cpu, dim=0)
 
812
  off = verts_seq[0].mean(0); off[1] = verts_seq[0, :, 1].min()
 
813
  verts_seq = verts_seq - off
814
- g_R, g_T, g_L = get_global_cameras_static(verts_seq, beta=2.0, cam_height_degree=20, target_center_height=1.0, device=device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
815
  if global_J_reg is not None and verts_seq.shape[1] == global_J_reg.shape[-1]:
816
- roots = torch.einsum("jv,fvk->fjk", global_J_reg.cpu(), verts_seq)[:, 0]
817
- else: roots = verts_seq.mean(1)
 
 
 
 
 
 
 
818
  sc, cx, cz = get_ground_params_from_points(roots, verts_seq)
 
819
  global_renderer.set_ground(sc * 1.5, cx, cz)
 
820
  col = torch.tensor([[0.0, 1.0, 0.0]], device=device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
821
  for i in range(len(verts_seq)):
 
822
  cam = global_renderer.create_camera(g_R[i], g_T[i])
 
823
  img = global_renderer.render_with_ground(verts_seq[i].to(device)[None], col, cam, g_L)
824
- vid_global.write(img[:, :, ::-1].copy())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
825
  except: pass
 
826
  vid_global.release()
 
827
  prof["debug_rend"] += (time.perf_counter() - t0_dbg)
828
-
 
 
829
  t0_save = time.perf_counter()
 
830
  if args.genmo:
 
831
  trans_w = np.stack(transl_w_all).astype(np.float32)
 
832
  world_off = trans_w[0].copy(); world_off[1] -= float(args.world_y_offset_m)
 
833
  trans_w_centered = trans_w - world_off[None]
 
834
  mats_w2c = np.stack(cam_T_w2c_cv_all).astype(np.float32)
 
835
  mats_wc = np.stack(cam_T_wc_cv_all).astype(np.float32)
 
836
  T_wp_w = np.eye(4, dtype=np.float32); T_wp_w[:3, 3] = world_off
 
837
  T_w_wp = np.eye(4, dtype=np.float32); T_w_wp[:3, 3] = -world_off
 
838
  mats_w2c_c = np.matmul(mats_w2c, T_wp_w[None])
 
839
  mats_wc_c = np.matmul(T_w_wp[None], mats_wc)
 
840
  cam_av, cam_tv = compute_velocity(mats_wc_c, fps=FPS)
841
-
842
- # CONCAT FEATURES
 
843
  f_imgseq = torch.cat(all_vit_features, dim=0).float() if all_vit_features else torch.empty(0)
844
 
 
 
845
  g_dict = {
 
846
  "smpl_params_c": {"global_orient": torch.from_numpy(np.stack(global_orient_c_all)), "body_pose": torch.from_numpy(np.stack(body_pose_all)), "transl": torch.from_numpy(np.stack(transl_c_all)), "betas": torch.from_numpy(np.stack(betas_all))},
 
847
  "smpl_params_w": {"global_orient": torch.from_numpy(np.stack(global_orient_w_all)), "body_pose": torch.from_numpy(np.stack(body_pose_all)), "transl": torch.from_numpy(trans_w_centered), "betas": torch.from_numpy(np.stack(betas_all))},
 
848
  "T_w2c": torch.from_numpy(mats_w2c_c), "K_fullimg": torch.from_numpy(np.stack(K_fullimg_all)),
 
849
  "kp2d": torch.from_numpy(np.stack(kp2d_all)), "bbx_xys": torch.from_numpy(np.stack(bbx_xys_all)),
 
850
  "cam_angvel": torch.from_numpy(cam_av), "cam_tvel": torch.from_numpy(cam_tv),
 
851
  "imgname": img_paths, "valid_mask": torch.ones(len(img_paths), dtype=torch.float32),
 
852
  "world_offset": torch.from_numpy(world_off.astype(np.float32)),
853
- "f_imgseq": f_imgseq # <--- ADDED FEATURES HERE
 
 
854
  }
 
855
  torch.save(g_dict, genmo_out)
856
 
 
 
857
  if args.smplx:
 
858
  poses66 = np.concatenate([np.stack(global_orient_w_all), np.stack(body_pose_all)], axis=1)
 
859
  poses165 = np.pad(poses66, ((0,0),(0,99)), mode="constant").astype(np.float32)
 
860
  trans_w = np.stack(transl_w_all).astype(np.float32)
 
861
  world_off = trans_w[0].copy(); world_off[1] -= float(args.world_y_offset_m)
 
862
  trans_w = trans_w - world_off[None]
 
863
  np.savez(smplx_global_out, mocap_framerate=int(FPS), gender="neutral", betas=betas_all[0], trans=trans_w, poses=poses165, world_offset=world_off)
864
 
 
 
865
  if args.vitpose and coco_subset:
 
866
  with open(os.path.join(temp_ann_dir, f"{seq_name}.json"), "w") as f: json.dump(coco_subset, f)
867
-
 
 
868
  prof["save_files"] = time.perf_counter() - t0_save
 
869
  total_t = time.perf_counter() - t_start_seq
870
-
 
 
871
  print(f" > Done in {total_t:.2f}s | FPS: {num_frames/total_t:.1f}")
 
872
  print(f" [Breakdown] BatchPrep: {prof['smpl_batch']:.2f}s | Init/Gap: {prof['prep']:.2f}s | Read: {prof['video_read']:.2f}s")
 
873
  print(f" Overlay: {prof['overlay']:.2f}s | SparseWrite: {prof['sparse_write']:.2f}s | ViT: {prof['vit_process']:.2f}s")
 
874
  print(f" DbgRend: {prof['debug_rend']:.2f}s | SaveFiles: {prof['save_files']:.2f}s")
875
 
 
 
876
  print("All sequences processed.")
877
 
 
 
878
  if __name__ == "__main__":
879
- main()
 
 
1
  import sys
2
+
3
  import os
4
+
5
  import json
6
+
7
  import argparse
8
+
9
  import numpy as np
10
+
11
  import zlib
12
+
13
  from glob import glob
14
+
15
  from tqdm import tqdm
16
+
17
  import cv2
18
+
19
  import torch
20
+
21
  from scipy.spatial.transform import Rotation as R
22
+
23
  import time
24
+
25
  import shutil
26
+
27
  from pathlib import Path
28
 
29
+
30
+
31
  # --- SETUP PATHS FOR IMPORTS ---
32
+
33
+ REPO_ROOT = Path(__file__).resolve().parents[2]
34
+
35
  if str(REPO_ROOT) not in sys.path:
36
+
37
  sys.path.insert(0, str(REPO_ROOT))
38
+
39
+
40
+
41
+ from hmr4d.utils.preproc.vitfeat_extractor import Extractor
42
+
43
+ from hmr4d.utils.pylogger import Log
44
+
45
+
46
+
47
+ # Force single thread
48
+
 
 
 
 
49
  os.environ["OMP_NUM_THREADS"] = "1"
50
+
51
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
52
+
53
  cv2.setNumThreads(0)
54
+
55
  torch.set_num_threads(1)
56
 
57
+
58
+
59
  FPS = 30.0
60
+
61
+ DEBUG_NUM_FRAMES = 5
62
+
63
  IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
64
+
65
  IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)
66
 
67
+
68
+
69
+ # --- HELPER FUNCTIONS (No Changes) ---
70
 
71
  def _process_image_memory(img_bgr, bbox_xywh, img_size=256):
72
+
73
+ if img_bgr is None: return np.zeros((3, img_size, img_size), dtype=np.float32)
 
 
 
74
 
75
  x, y, w, h = bbox_xywh
76
+
77
  cx, cy = x + w/2, y + h/2
78
+
79
+ scale = max(w, h) * 1.2
80
+
 
 
 
 
81
  H, W = img_bgr.shape[:2]
82
+
83
  max_side = float(max(H, W, 1))
84
 
85
+ if scale <= 1.0 or scale > max_side * 20.0: scale = max_side * 0.5
86
+
 
 
87
  half = scale / 2.0
88
+
89
  x0, y0 = int(cx - half), int(cy - half)
90
+
91
  x1, y1 = int(cx + half), int(cy + half)
92
 
93
  pad_l, pad_t = max(0, -x0), max(0, -y0)
94
+
95
  pad_r, pad_b = max(0, x1 - W), max(0, y1 - H)
96
 
97
+ if max(pad_l, pad_t, pad_r, pad_b) > int(max_side * 4.0): return np.zeros((3, img_size, img_size), dtype=np.float32)
 
 
98
 
99
  if pad_l or pad_t or pad_r or pad_b:
100
+
101
  img_bgr = cv2.copyMakeBorder(img_bgr, pad_t, pad_b, pad_l, pad_r, cv2.BORDER_CONSTANT, value=(0,0,0))
102
+
103
  x0 += pad_l; y0 += pad_t; x1 += pad_l; y1 += pad_t
104
 
105
  crop = img_bgr[y0:y1, x0:x1]
106
+
107
+ if crop.size == 0: return np.zeros((3, img_size, img_size), dtype=np.float32)
108
+
 
109
  if crop.shape[0] != img_size or crop.shape[1] != img_size:
110
+
111
  crop = cv2.resize(crop, (img_size, img_size), interpolation=cv2.INTER_LINEAR)
112
 
113
+ crop = crop[:, :, ::-1].astype(np.float32) / 255.0
114
+
115
  crop = (crop - IMAGENET_MEAN) / IMAGENET_STD
116
+
117
+ return crop.transpose(2, 0, 1)
118
+
119
+
120
 
121
  def _alpha_blend_bgra_onto_bgr(dst_bgr, src_bgra, x, y):
122
+
123
  if dst_bgr is None or src_bgra is None: return dst_bgr
124
+
125
  H, W = dst_bgr.shape[:2]
126
+
127
  h, w = src_bgra.shape[:2]
128
+
129
  if w <= 0 or h <= 0: return dst_bgr
130
+
131
  x0, y0 = max(int(x), 0), max(int(y), 0)
132
+
133
  x1, y1 = min(int(x + w), W), min(int(y + h), H)
134
+
135
  if x1 <= x0 or y1 <= y0: return dst_bgr
136
+
137
  roi = dst_bgr[y0:y1, x0:x1]
138
+
139
  src_crop = src_bgra[(y0 - int(y)):(y0 - int(y)) + (y1 - y0), (x0 - int(x)):(x0 - int(x)) + (x1 - x0)]
140
+
141
  if src_crop.shape[2] == 3:
142
+
143
  roi[:] = src_crop
144
+
145
  return dst_bgr
146
 
147
  alpha = src_crop[:, :, 3].astype(np.uint16)
148
+
149
  inv_alpha = 255 - alpha
150
+
151
  b_src, g_src, r_src = src_crop[:, :, 0], src_crop[:, :, 1], src_crop[:, :, 2]
152
+
153
  b_dst, g_dst, r_dst = roi[:, :, 0], roi[:, :, 1], roi[:, :, 2]
154
 
155
  roi[:, :, 0] = ((b_src * alpha + b_dst * inv_alpha) >> 8).astype(np.uint8)
156
+
157
  roi[:, :, 1] = ((g_src * alpha + g_dst * inv_alpha) >> 8).astype(np.uint8)
158
+
159
  roi[:, :, 2] = ((r_src * alpha + r_dst * inv_alpha) >> 8).astype(np.uint8)
160
+
161
  return dst_bgr
162
 
163
+
164
+
165
  def _find_ui_dir():
166
+
167
  cand = os.path.join(os.getcwd(), "UI")
168
+
169
  if os.path.isdir(cand): return cand
170
+
171
+ return None # Simplified for brevity
172
+
173
+
174
 
175
  def _find_font_path(ui_dir, filename="Inter_18pt-Bold.ttf"):
176
+
177
  if not ui_dir: return None
178
+
179
  p = os.path.join(ui_dir, filename)
180
+
181
  return p if os.path.isfile(p) else None
182
 
183
+
184
+
185
  def _load_ui_images(ui_dir):
186
+
187
  if not ui_dir or (not os.path.isdir(ui_dir)): return []
188
+
189
  imgs = []
190
+
191
  for name in sorted(os.listdir(ui_dir)):
192
+
193
  p = os.path.join(ui_dir, name)
194
+
195
  if not os.path.isfile(p): continue
196
+
197
  if name.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
198
+
199
  im = cv2.imread(p, cv2.IMREAD_UNCHANGED)
200
+
201
  if im is not None:
202
+
203
  if im.ndim == 2: im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR)
204
+
205
  imgs.append(im)
206
+
207
  return imgs
208
 
209
+
210
+
211
  class SimpleUIOverlay:
212
+
213
  def __init__(self, width, height, seed=0, ui_dir=None, max_images=4, show_prob=0.6, min_hold_frames=20, max_hold_frames=120):
214
+
215
  self.W, self.H = int(width), int(height)
216
+
217
  self.rng = np.random.default_rng(int(seed))
218
+
219
  self.max_images = max(0, int(max_images))
220
+
221
  self.show_prob = float(show_prob)
222
+
223
  self.min_hold_frames, self.max_hold_frames = max(1, int(min_hold_frames)), max(1, int(max_hold_frames))
224
+
225
  self.ui_dir = ui_dir if ui_dir else _find_ui_dir()
226
+
227
  self.assets = _load_ui_images(self.ui_dir)
228
+
229
  self._ttl, self._active = 0, []
230
 
231
  def _pick_new_state(self):
232
+
233
  self._ttl = int(self.rng.integers(self.min_hold_frames, self.max_hold_frames + 1))
234
+
235
  self._active = []
236
+
237
  if (not self.assets) or (self.max_images <= 0): return
238
+
239
  if float(self.rng.random()) > self.show_prob: return
240
+
241
  k = min(int(self.rng.integers(1, self.max_images + 1)), len(self.assets))
242
+
243
  idxs = self.rng.choice(len(self.assets), size=k, replace=False)
244
+
245
  for idx in idxs:
246
+
247
  im = self.assets[int(idx)]
248
+
249
  h, w = im.shape[:2]
250
+
251
  if w > 0 and h > 0:
252
+
253
  x = int(self.rng.integers(-w // 4, max(1, self.W - (3 * w // 4))))
254
+
255
  y = int(self.rng.integers(-h // 4, max(1, self.H - (3 * h // 4))))
256
+
257
  self._active.append((im, x, y))
258
 
259
  def draw(self, img_bgr):
260
+
261
  if img_bgr is None: return img_bgr
262
+
263
  if self._ttl <= 0: self._pick_new_state()
264
+
265
  self._ttl -= 1
266
+
267
+ for im, x, y in self._active: _alpha_blend_bgra_onto_bgr(img_bgr, im, x, y)
268
+
269
  return img_bgr
270
 
271
+
272
+
273
  class SimpleChatOverlay:
274
+
275
  def __init__(self, width, height, seed=0, num_lines=7, region_w=420, region_h=180, margin=18, every_n_frames=15, corner=None, font_path=None):
276
+
277
  from collections import deque
278
+
279
  self.W, self.H = int(width), int(height)
280
+
281
  self.rng = np.random.default_rng(int(seed))
282
+
283
  self.num_lines, self.margin, self.every_n_frames = int(num_lines), int(margin), max(1, int(every_n_frames))
284
+
285
  self.region_w, self.region_h = int(region_w), int(region_h)
286
+
287
  self.font_path = font_path
288
+
289
  self._pil_fonts = {}
290
+
291
  self.corner = str(corner) if corner else str(self.rng.choice(["tl", "tr", "bl", "br"]))
292
+
293
  self.messages = deque(maxlen=self.num_lines)
294
+
295
  for _ in range(self.num_lines): self.messages.append(self._random_message())
296
+
297
  self._cached_overlay, self._dirty = None, True
298
 
299
  def _random_message(self):
300
+
301
  user = str(self.rng.choice(["nightbot", "viewer", "catjam", "shadow", "speedrunner", "chattycathy", "kappaking"]))
302
+
303
  if self.rng.random() < 0.5: user += str(self.rng.integers(10, 999))
304
+
305
  text = str(self.rng.choice(["pog", "lol", "gg", "nice", "W", "L", "no shot", "crazy", "clip it", "cooking", "unlucky"]))
306
+
307
  color = tuple(int(x) for x in self.rng.choice([(255, 120, 0), (0, 180, 255), (255, 0, 180), (0, 255, 120)]))
308
+
309
  return {"user": user, "text": text, "color": color}
310
 
311
  def _get_pil_font(self, size_px):
312
+
313
  if not self.font_path: return None
314
+
315
  if size_px in self._pil_fonts: return self._pil_fonts[size_px]
316
+
317
  try:
318
+
319
  from PIL import ImageFont
320
+
321
+ return ImageFont.truetype(self.font_path, size=max(1, size_px))
322
+
323
  except: return None
324
 
325
  def maybe_append(self, frame_idx):
326
+
327
  if int(frame_idx) % self.every_n_frames == 0:
328
+
329
  self.messages.append(self._random_message())
330
+
331
  self._dirty = True
332
 
333
  def _render_cache(self):
334
+
335
  rw = min(self.region_w, max(40, self.W - 2 * self.margin))
336
+
337
  rh = min(self.region_h, max(40, self.H - 2 * self.margin))
338
+
339
  pil_font = self._get_pil_font(int(round(np.clip(20.0 * (self.H / 720.0), 14.0, 30.0))))
340
+
341
  if pil_font is None:
342
+
343
+ self._cached_overlay = None; return
344
+
345
  try:
346
+
347
  from PIL import Image, ImageDraw
348
+
349
  pil = Image.new("RGBA", (rw, rh), (0, 0, 0, 0))
350
+
351
  draw = ImageDraw.Draw(pil)
352
+
353
+ line_h = max(14, int(round(float(getattr(pil_font, "size", 18)) * 1.25)))
354
+
355
  lines = list(self.messages)[-min(self.num_lines, max(1, rh // line_h)):]
356
+
357
  local_y = rh - line_h if self.corner in ("bl", "br") else 0
358
+
359
  for msg in lines:
360
+
361
  user = f"{msg['user']}: "
362
+
363
+ draw.text((0, local_y), user, font=pil_font, fill=tuple(msg['color'][::-1]))
364
+
365
  tw = draw.textlength(user, font=pil_font)
366
+
367
  draw.text((tw, local_y), msg['text'], font=pil_font, fill=(240, 240, 240))
368
+
369
  local_y += (-line_h if self.corner in ("bl", "br") else line_h)
370
+
371
  self._cached_overlay = cv2.cvtColor(np.asarray(pil), cv2.COLOR_RGBA2BGRA)
372
+
373
  except: self._cached_overlay = None
374
 
375
  def draw(self, img_bgr):
376
+
377
  if img_bgr is None: return img_bgr
378
+
379
+ if self._dirty: self._render_cache(); self._dirty = False
380
+
381
  if self._cached_overlay is not None:
382
+
383
  rw = min(self.region_w, max(40, self.W - 2 * self.margin))
384
+
385
  rh = min(self.region_h, max(40, self.H - 2 * self.margin))
386
+
387
  if self.corner == "tl": x, y = self.margin, self.margin
388
+
389
  elif self.corner == "tr": x, y = self.W - self.margin - rw, self.margin
390
+
391
  elif self.corner == "bl": x, y = self.margin, self.H - self.margin - rh
392
+
393
  else: x, y = self.W - self.margin - rw, self.H - self.margin - rh
394
+
395
  _alpha_blend_bgra_onto_bgr(img_bgr, self._cached_overlay, x, y)
396
+
397
  return img_bgr
398
 
399
+
400
+
401
+ def k4_to_K3(k4): return np.array([[k4[0], 0, k4[2]], [0, k4[1], k4[3]], [0, 0, 1]], dtype=np.float32)
402
 
403
  def bbox_xywh_to_bbx_xys(bbox_xywh, base_enlarge=1.0):
404
+
405
  x, y, w, h = [float(v) for v in bbox_xywh]
406
+
407
  return np.array([x + 0.5 * w, y + 0.5 * h, max(w, h) * float(base_enlarge)], dtype=np.float32)
408
 
409
  def clamp_bbox_xywh_to_image(bbox_xywh, W, H, min_size=1.0):
410
+
411
  x, y, w, h = [float(v) for v in bbox_xywh]
412
+
413
  W, H = float(W), float(H)
414
+
415
  if W <= 0 or H <= 0: return [0.0, 0.0, 0.0, 0.0]
416
+
417
  x2, y2 = x + w, y + h
418
+
419
  x1c = float(np.clip(x, 0.0, max(0.0, W - 1.0)))
420
+
421
  y1c = float(np.clip(y, 0.0, max(0.0, H - 1.0)))
422
+
423
  x2c = float(np.clip(x2, 0.0, W))
424
+
425
  y2c = float(np.clip(y2, 0.0, H))
426
+
427
  if x2c <= x1c: x2c = min(W, x1c + float(min_size))
428
+
429
  if y2c <= y1c: y2c = min(H, y1c + float(min_size))
430
+
431
  wc = max(0.0, x2c - x1c)
432
+
433
  hc = max(0.0, y2c - y1c)
434
+
435
  return [x1c, y1c, wc, hc]
436
 
437
  def draw_bbox_xywh_and_center(img_bgr, bbox_xywh, color=(255, 255, 0)):
438
+
439
  x, y, w, h = [float(v) for v in bbox_xywh]
440
+
441
  cv2.rectangle(img_bgr, (int(x), int(y)), (int(x+w), int(y+h)), color, 2)
442
+
443
  cv2.circle(img_bgr, (int(x+w/2), int(y+h/2)), 4, (0, 0, 255), -1)
444
 
445
  def vis_label_and_color(v: int):
446
+
447
  if v == 2: return "VIS", (0, 255, 0)
448
+
449
  if v == 1: return "OCC", (0, 165, 255)
450
+
451
  return "OFF", (160, 160, 160)
452
 
453
  def draw_vis_text_and_points(img_bgr, kpts2d_xy, vis17):
454
+
455
  for k in range(17):
456
+
457
  v = int(vis17[k])
458
+
459
  label, color = vis_label_and_color(v)
460
+
461
  x, y = int(round(kpts2d_xy[k, 0])), int(round(kpts2d_xy[k, 1]))
462
+
463
  if v > 0: cv2.circle(img_bgr, (x, y), 4, color, -1)
464
+
465
  cv2.putText(img_bgr, f"{k}:{label}", (x + 6, y - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 1, cv2.LINE_AA)
466
 
467
  def build_T_wc(pos_world, quat_world_xyzw):
468
+
469
  T = np.eye(4, dtype=np.float64)
470
+
471
  T[:3, :3] = R.from_quat(np.asarray(quat_world_xyzw, dtype=np.float64)).as_matrix()
472
+
473
  T[:3, 3] = np.asarray(pos_world, dtype=np.float64)
474
+
475
  return T
476
 
477
  def compute_velocity(mats, fps=30.0):
478
+
479
  N = len(mats)
480
+
481
  if N < 2: return np.zeros((N, 3), dtype=np.float32), np.zeros((N, 3), dtype=np.float32)
482
+
483
  R_curr = mats[:, :3, :3]
484
+
485
  R_diff = np.matmul(R_curr[1:], np.transpose(R_curr[:-1], (0, 2, 1)))
486
+
487
  rv = R.from_matrix(R_diff).as_rotvec()
488
+
489
  angvel = np.zeros((N, 3), dtype=np.float32)
490
+
491
  angvel[1:] = rv
492
+
493
  t_curr = mats[:, :3, 3]
494
+
495
  tvel = np.zeros((N, 3), dtype=np.float32)
496
+
497
  tvel[1:] = t_curr[1:] - t_curr[:-1]
498
+
499
  return angvel.astype(np.float32), tvel.astype(np.float32)
500
 
501
+
502
+
503
  def _compute_vitpose_selected_indices(num_frames, fps, bucket_seconds, frames_per_bucket, sampling="uniform", seed=123):
504
+
505
  if num_frames <= 0: return []
506
+
507
  rng = np.random.default_rng(int(seed))
508
+
509
  selected = []
510
+
511
  bucket_len = max(1, int(round(float(bucket_seconds) * float(fps))))
512
+
513
  b_start = 0
514
+
515
  while b_start < num_frames:
516
+
517
  b_end = min(num_frames, b_start + bucket_len)
518
+
519
  k = min(int(frames_per_bucket), b_end - b_start)
520
+
521
  if k > 0:
522
+
523
+ if sampling == "random": idxs = np.sort(rng.choice(np.arange(b_start, b_end), size=k, replace=False)).tolist()
524
+
525
+ elif sampling == "linspace": idxs = sorted(list(set(np.linspace(b_start, b_end - 1, k, dtype=int).tolist())))
526
+
527
+ else:
528
+
529
  if k == 1: idxs = [b_start + (b_end - b_start) // 2]
530
+
531
+ else: step = (b_end - b_start) // k; idxs = [min(b_start + i * step, b_end - 1) for i in range(k)]
532
+
533
  selected.extend(idxs)
534
+
535
  b_start = b_end
536
+
537
  return sorted(list(set(selected)))
538
 
539
+
540
+
541
  _SMPLX_MODEL = None
542
+
543
  _SMPLX_DEVICE = None
544
 
545
  def _get_smplx_model(device):
546
+
547
  global _SMPLX_MODEL, _SMPLX_DEVICE
548
+
549
+ if _SMPLX_MODEL is not None and _SMPLX_DEVICE == device: return _SMPLX_MODEL
550
+
551
  from hmr4d.utils.smplx_utils import make_smplx
552
+
553
  _SMPLX_MODEL = make_smplx("supermotion").to(device).eval()
554
+
555
  _SMPLX_DEVICE = device
556
+
557
  return _SMPLX_MODEL
558
 
559
+
560
+
561
  class SmplIncamRenderer:
562
+
563
  def __init__(self, width, height, K4, device="cuda", smplx2smpl_path="hmr4d/utils/body_model/smplx2smpl_sparse.pt"):
564
+
565
  from hmr4d.utils.smplx_utils import make_smplx
566
+
567
  from hmr4d.utils.vis.renderer import Renderer
568
+
569
  self.torch = torch
570
+
571
  self.device = device
572
+
573
  self.smplx = make_smplx("supermotion").to(device).eval()
574
+
575
+ self.smplx2smpl = None; self.faces = None
576
+
577
  try:
578
+
579
  self.smplx2smpl = torch.load(smplx2smpl_path).to(device)
580
+
581
  self.faces = make_smplx("smpl").faces
582
+
583
  except: self.faces = self.smplx.faces
584
+
585
  self.K_torch = torch.from_numpy(k4_to_K3(K4)).to(device)
586
+
587
  self.renderer = Renderer(width, height, device=device, faces=self.faces, K=self.K_torch)
588
 
589
  @torch.no_grad()
590
+
591
  def render(self, img_rgb_uint8, global_orient_aa, body_pose_aa, betas_10, transl_xyz, fl, pp):
592
+
593
  K3_torch = torch.from_numpy(np.array([[fl[0], 0, pp[0]], [0, fl[1], pp[1]], [0, 0, 1]], dtype=np.float32)).to(self.device)
594
+
595
  self.renderer.set_intrinsic(K3_torch)
596
+
597
+ params = { "global_orient": torch.from_numpy(global_orient_aa[None]).float().to(self.device), "body_pose": torch.from_numpy(body_pose_aa[None]).float().to(self.device), "betas": torch.from_numpy(betas_10[None]).float().to(self.device), "transl": torch.from_numpy(transl_xyz[None]).float().to(self.device), }
598
+
599
+ out = self.smplx(**params); verts = out.vertices[0]
600
+
 
 
 
601
  if self.smplx2smpl is not None and verts.dim() == 2: verts = torch.matmul(self.smplx2smpl, verts)
602
+
603
  img_out = self.renderer.render_mesh(verts, img_rgb_uint8, [0.8, 0.8, 0.8])
604
+
605
  return img_out
606
+
607
  @torch.no_grad()
608
+
609
  def get_verts(self, global_orient_aa, body_pose_aa, betas_10, transl_xyz):
610
+
611
+ params = { "global_orient": torch.from_numpy(global_orient_aa[None]).float().to(self.device), "body_pose": torch.from_numpy(body_pose_aa[None]).float().to(self.device), "betas": torch.from_numpy(betas_10[None]).float().to(self.device), "transl": torch.from_numpy(transl_xyz[None]).float().to(self.device), }
612
+
613
+ out = self.smplx(**params); verts = out.vertices[0]
614
+
615
+ if self.smplx2smpl is not None and verts.dim() == 2: verts = torch.matmul(self.smplx2smpl, verts)
616
+
 
 
 
617
  return verts
618
 
619
+
620
+
621
  def _as_betas10(betas_any) -> np.ndarray:
622
+
623
  betas = np.asarray(betas_any, dtype=np.float32).reshape(-1)
624
+
625
+ betas10 = np.zeros(10, dtype=np.float32); n = min(10, betas.size)
626
+
627
  if n > 0: betas10[:n] = betas[:n]
628
+
629
  return betas10
630
 
631
  def load_betas10_from_npz(npz_path, key="betas", index=None):
632
+
633
+ with np.load(npz_path, allow_pickle=True) as data: arr = data[key]
634
+
635
  if arr.ndim == 0: arr = np.asarray(arr).reshape(1)
636
+
637
  if arr.ndim == 1: betas = arr
638
+
639
+ elif arr.ndim == 2: row_idx = 0 if index is None else int(index); betas = arr[row_idx]
640
+
641
  else: raise ValueError(f"Bad betas shape: {arr.shape}")
642
+
643
  return _as_betas10(betas)
644
 
645
+ def _default_shape_npz_path() -> str: return os.path.join(os.path.dirname(__file__), "shape.npz")
646
+
647
+
648
 
649
  def parse_smpl_inputs_from_row(row, override_betas10=None, keep_unity_scale=False, transl_source="pelvis", transl_y_offset_m=0.0):
650
+
651
  C = np.diag([1.0, -1.0, 1.0]).astype(np.float64)
652
+
653
  cam_rot_w_quat = np.array(row["cam_rot_world"], dtype=np.float64)
654
+
655
  R_cam_w = R.from_quat(cam_rot_w_quat).as_matrix()
656
+
657
  pel_rot_w_quat = np.array(row["pelvis_rot_world"], dtype=np.float64)
658
+
659
  R_pel_w = R.from_quat(pel_rot_w_quat).as_matrix()
660
+
661
+
662
+
663
+ # Relative Rotation (Body to Camera)
664
+
665
  R_rel_unity = R_cam_w.T @ R_pel_w
666
+
667
  R_cv = C @ R_rel_unity @ C
668
+
669
+ R_final = R_cv @ R.from_euler("z", 180, degrees=True).as_matrix()
670
+
671
  global_orient_aa = R.from_matrix(R_final).as_rotvec().astype(np.float32)
672
 
673
+
674
+
675
  smpl_scale = float(row.get("smpl_root_world_scale", 1.0))
676
+
677
  pelvis_cam_unity = np.asarray(row["smpl_incam_transl"], dtype=np.float64).reshape(3)
678
+
679
  root_cam_unity = np.asarray(row.get("smpl_root_incam_transl", [0.0, 0.0, 0.0]), dtype=np.float64).reshape(3)
680
+
681
  pelvis_cam_unity = pelvis_cam_unity + np.array([0.0, float(transl_y_offset_m), 0.0], dtype=np.float64)
682
 
683
+
684
+
685
  if str(transl_source).strip().lower() == "root": target_cam_unity = root_cam_unity
686
+
687
  else:
688
+
689
  if bool(keep_unity_scale): target_cam_unity = pelvis_cam_unity
690
+
691
  else:
692
+
693
  if abs(smpl_scale) > 1e-8: target_cam_unity = root_cam_unity + (pelvis_cam_unity - root_cam_unity) / smpl_scale
694
+
695
  else: target_cam_unity = pelvis_cam_unity
696
+
697
  target_cam_cv = (C @ target_cam_unity).astype(np.float64)
698
 
699
+
700
+
701
  pose = np.asarray(row["smplx_pose"], dtype=np.float32)
702
+
703
  body_pose = pose[3:66].astype(np.float32)
704
+
705
  betas10 = _as_betas10(override_betas10)
706
+
707
+
708
+
709
  return {
710
+
711
  "global_orient": global_orient_aa, "body_pose": body_pose, "betas": betas10,
712
+
713
  "target_cam_cv": target_cam_cv, "cam_rot_w_quat": cam_rot_w_quat,
714
+
715
  "cam_pos_world": np.asarray(row["cam_pos_world"], dtype=np.float64).reshape(3),
716
+
717
  "pelvis_pos_world": np.asarray(row["pelvis_pos_world"], dtype=np.float64).reshape(3),
718
+
719
  "smpl_scale": smpl_scale, "root_cam_unity": root_cam_unity
720
+
721
  }
722
 
723
+
724
+
725
  def batch_smpl_forward(betas, global_orient, body_pose, device):
726
+
727
  model = _get_smplx_model(device)
728
+
729
  N = len(betas)
730
+
731
+ chunk_size = 4096; pelvis_list = []
732
+
733
  with torch.no_grad():
734
+
735
  for i in range(0, N, chunk_size):
736
+
737
  b_betas = torch.from_numpy(betas[i:i+chunk_size]).float().to(device)
738
+
739
  b_go = torch.from_numpy(global_orient[i:i+chunk_size]).float().to(device)
740
+
741
  b_bp = torch.from_numpy(body_pose[i:i+chunk_size]).float().to(device)
742
+
743
  b_tr = torch.zeros((len(b_betas), 3), dtype=torch.float32, device=device)
744
+
745
  out = model(betas=b_betas, global_orient=b_go, body_pose=b_bp, transl=b_tr)
746
+
747
  pelvis_list.append(out.joints[:, 0, :].detach().cpu().numpy())
748
+
749
  return np.concatenate(pelvis_list, axis=0)
750
 
751
+
752
+
753
  def main():
754
+
755
  parser = argparse.ArgumentParser()
756
+
757
  parser.add_argument("--input", required=True)
758
+
759
  parser.add_argument("--output", required=True)
760
+
761
  parser.add_argument("--debug", action="store_true")
762
+
763
  parser.add_argument("--vitpose", action="store_true")
764
+
765
+ parser.add_argument("--genmo", action="store_true")
766
+
767
  parser.add_argument("--dpvo", action="store_true")
768
+
769
  parser.add_argument("--smplx", action="store_true")
770
+
771
  parser.add_argument("--debug_no_coco", action="store_true")
772
+
773
  parser.add_argument("--shape_npz", default=_default_shape_npz_path())
774
+
775
  parser.add_argument("--vitpose_use_all_frames", action="store_true")
776
+
777
  parser.add_argument("--vitpose_bucket_seconds", type=float, default=12.0)
778
+
779
  parser.add_argument("--vitpose_frames_per_bucket", type=int, default=36)
780
+
781
  parser.add_argument("--vitpose_sampling", type=str, default="random")
782
+
783
  parser.add_argument("--vitpose_seed", type=int, default=123)
784
+
785
  parser.add_argument("--ui_dir", type=str, default=None)
786
+
787
  parser.add_argument("--ui_show_prob", type=float, default=0.25)
788
+
789
  parser.add_argument("--ui_max_images", type=int, default=3)
790
+
791
  parser.add_argument("--ui_hold_min_s", type=float, default=0.7)
792
+
793
  parser.add_argument("--ui_hold_max_s", type=float, default=5.0)
794
+
795
  parser.add_argument("--ui_seed", type=int, default=None)
796
+
797
  parser.add_argument("--keep_unity_scale", action="store_true")
798
+
799
  parser.add_argument("--transl_source", type=str, default="pelvis")
800
+
801
  parser.add_argument("--transl_y_offset_m", type=float, default=-0.020)
802
+
803
  parser.add_argument("--world_y_offset_m", type=float, default=1.3415)
804
+
805
  parser.add_argument("--vit_batch_size", type=int, default=512, help="Batch size for in-memory ViT extraction")
806
+
807
  args = parser.parse_args()
808
 
809
+
810
+
811
  if not (args.vitpose or args.genmo or args.dpvo or args.smplx):
812
+
813
  args.vitpose = args.genmo = args.dpvo = args.smplx = True
814
 
815
+
816
+
817
  device = "cuda" if torch.cuda.is_available() else "cpu"
818
+
819
  print(f"Running STREAMING processing on {device.upper()}...")
820
+
821
+
822
+
823
  vit_model = None
824
+
825
  if args.genmo and Extractor is not None:
826
+
827
  print("Initializing ViT Extractor (HMR2)...")
828
+
829
  extractor_wrapper = Extractor(tqdm_leave=False)
830
+
831
  vit_model = extractor_wrapper.extractor
832
+
833
  vit_model.eval()
834
+
835
  vit_model.to(device)
836
 
837
+
838
+
839
  override_betas10 = load_betas10_from_npz(args.shape_npz, key="betas")
840
+
841
  temp_ann_dir = os.path.join(args.output, "vitpose", "temp_annotations")
842
+
843
  os.makedirs(temp_ann_dir, exist_ok=True)
844
+
845
  jsonl_files = sorted(glob(os.path.join(args.input, "sequence_*.jsonl")))
846
+
847
+
848
+
849
  global_J_reg = None
850
+
851
  j_reg_path = "third_party/GVHMR/inputs/checkpoints/body_models/smpl_neutral_J_regressor.pt"
852
+
853
  if os.path.exists(j_reg_path) and device == "cuda":
854
+
855
  global_J_reg = torch.load(j_reg_path, map_location=device)
856
 
857
+
858
+
859
  for jsonl_idx, jsonl_path in enumerate(jsonl_files):
860
+
861
  seq_name = os.path.splitext(os.path.basename(jsonl_path))[0].replace("sequence_", "")
862
+
863
  print(f"[{jsonl_idx+1}/{len(jsonl_files)}] Processing {seq_name}...")
864
+
865
+
866
+
867
  prof = {"smpl_batch": 0.0, "video_read": 0.0, "overlay": 0.0, "vit_process": 0.0,
868
+
869
  "sparse_write": 0.0, "loop_total": 0.0, "save_files": 0.0, "debug_rend": 0.0, "prep": 0.0}
870
 
871
+
872
+
873
  t_start_seq = time.perf_counter()
874
+
875
  jsonl_dir = os.path.dirname(os.path.abspath(jsonl_path))
876
+
877
  video_path = os.path.join(jsonl_dir, f"video_{seq_name}.mp4")
878
+
879
  if not os.path.exists(video_path): video_path = os.path.join(jsonl_dir, "video.mp4")
880
 
881
+
882
+
883
  out_img_folder = os.path.join(args.output, "images", seq_name)
884
+
885
  os.makedirs(out_img_folder, exist_ok=True)
886
+
887
+
888
 
889
  with open(jsonl_path, "r") as f: lines = f.readlines()
890
+
891
  lines = lines[1:] if len(lines) > 0 else []
892
+
893
  num_frames = len(lines)
894
+
895
  if num_frames <= 0: continue
896
 
897
+
898
+
899
  genmo_out = os.path.join(args.output, "genmo_features", f"{seq_name}.pt")
900
+
901
  smplx_out = os.path.join(args.output, "smplx_incam", f"{seq_name}_smplx.npz")
902
+
903
  smplx_global_out = os.path.join(args.output, "smplx_global", f"{seq_name}_global.npz")
904
+
905
  dpvo_dir = os.path.join(args.output, "dpvo", seq_name)
906
+
907
  for p in [genmo_out, smplx_out, smplx_global_out, dpvo_dir]:
908
+
909
  if p: os.makedirs(os.path.dirname(p), exist_ok=True)
910
 
911
+
912
+
913
  selected_set = set()
914
+
915
  if args.vitpose:
916
+
917
  if args.vitpose_use_all_frames: selected_indices = list(range(num_frames))
918
+
919
  else:
920
+
921
  selected_indices = _compute_vitpose_selected_indices(
922
+
923
  num_frames, FPS, args.vitpose_bucket_seconds,
924
+
925
  args.vitpose_frames_per_bucket, args.vitpose_sampling, args.vitpose_seed
926
+
927
  )
928
+
929
  selected_set = set(selected_indices)
930
 
931
+
932
+
933
  cap = cv2.VideoCapture(video_path)
934
+
935
  if not cap.isOpened(): continue
936
+
937
  W = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
938
+
939
  H = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
940
+
941
+
942
+
943
  resolved_ui_dir = args.ui_dir if args.ui_dir else _find_ui_dir()
944
+
945
  chat_font_path = _find_font_path(resolved_ui_dir)
946
+
947
  seq_seed = int(zlib.crc32(seq_name.encode("utf-8")) & 0xFFFFFFFF)
948
+
949
  chat_aug = SimpleChatOverlay(W, H, seed=seq_seed, num_lines=7, font_path=chat_font_path)
950
+
951
+ ui_aug = SimpleUIOverlay(W, H, seed=((seq_seed ^ 0xA5A5A5A5) & 0xFFFFFFFF), ui_dir=resolved_ui_dir,
952
+
953
  max_images=args.ui_max_images, show_prob=args.ui_show_prob)
954
 
955
+
956
+
957
  # --- BATCH SMPL (GPU) ---
958
+
959
  t0_smpl = time.perf_counter()
960
+
961
+
962
+
963
  smpl_precalc_data = []
964
+
965
  debug_global_verts_cpu = []
966
+
967
  parsed_rows = []
968
+
969
+
970
+
971
  for line in lines:
972
+
973
  row = json.loads(line)
974
+
975
  parsed_rows.append(parse_smpl_inputs_from_row(row, override_betas10, args.keep_unity_scale, args.transl_source, args.transl_y_offset_m))
976
+
977
+
978
+
979
  all_betas = np.stack([d['betas'] for d in parsed_rows])
980
+
981
  all_go = np.stack([d['global_orient'] for d in parsed_rows])
982
+
983
  all_bp = np.stack([d['body_pose'] for d in parsed_rows])
984
+
985
+
986
+
987
+ # We need an initial batch forward to get local pelvis offsets
988
+
989
  all_pelvis0 = batch_smpl_forward(all_betas, all_go, all_bp, device=device)
990
+
991
+
992
+
993
  C = np.diag([1.0, -1.0, 1.0]).astype(np.float64)
994
+
995
+ C4 = np.diag([1.0, -1.0, 1.0, 1.0]).astype(np.float64)
996
+
997
  all_go_w, all_pelvis_pos_w_cv = [], []
998
+
999
+
1000
+
1001
+ # --- FIX: DEFINE THE FIX ROTATION (Z-180) FOR WORLD ---
1002
+
1003
+ fix_rot = R.from_euler("z", 180, degrees=True).as_matrix()
1004
+
1005
+ fix_mat = np.eye(4, dtype=np.float64)
1006
+
1007
+ fix_mat[:3, :3] = fix_rot
1008
+
1009
+ # SMPLX-only adjustment: rotate the SMPLX global orientation in world by 180deg around Y.
1010
+ # (Do NOT touch camera/world transforms; this only changes the SMPL parameters.)
1011
+ # smplx_global_y180 = R.from_euler("y", 180, degrees=True).as_matrix()
1012
+
1013
+
1014
+
1015
  for i, d in enumerate(parsed_rows):
1016
+
1017
+ # Unity Rotation (Raw)
1018
+
1019
  R_cam_w_unity = R.from_quat(d['cam_rot_w_quat']).as_matrix()
1020
+
1021
+
1022
+
1023
+ # --- APPLY FIX HERE: Pre-multiply Camera Rot by Fix (Z-180) ---
1024
+
1025
+ # This ensures the SMPL global orientation is calculated relative to the FIXED Camera
1026
+
1027
+ R_cam_w_cv = fix_rot @ (C @ R_cam_w_unity @ C)
1028
+
1029
+
1030
+
1031
  R_pelvis_c_cv = R.from_rotvec(d['global_orient'].astype(np.float64)).as_matrix()
1032
+
1033
+ R_pelvis_w_cv = R_cam_w_cv @ R_pelvis_c_cv
1034
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1035
  all_go_w.append(R.from_matrix(R_pelvis_w_cv).as_rotvec().astype(np.float32))
1036
+
1037
+
1038
+
1039
+ # Position Logic
1040
 
1041
  pelvis_pos_w_unity = d['pelvis_pos_world']
1042
+
1043
  root_pos_w_unity = (R_cam_w_unity @ d['root_cam_unity'] + d['cam_pos_world']).reshape(3)
1044
+
1045
  smpl_scale = d['smpl_scale']
1046
+
1047
  transl_source_local = str(args.transl_source).strip().lower()
1048
+
1049
  if transl_source_local == "root": target_pos_w_unity = root_pos_w_unity
1050
+
1051
  else:
1052
+
1053
  if bool(args.keep_unity_scale): target_pos_w_unity = pelvis_pos_w_unity
1054
+
1055
  else:
1056
+
1057
  if abs(smpl_scale) > 1e-8: target_pos_w_unity = root_pos_w_unity + (pelvis_pos_w_unity - root_pos_w_unity) / smpl_scale
1058
+
1059
  else: target_pos_w_unity = pelvis_pos_w_unity
 
 
 
1060
 
1061
+
1062
+
1063
+ # --- APPLY FIX HERE: Pre-multiply Position by Fix (Z-180) ---
1064
+
1065
+ pos_cv_raw = (C @ target_pos_w_unity).astype(np.float64)
1066
+
1067
+ pelvis_pos_w_cv = fix_rot @ pos_cv_raw.reshape(3, 1)
1068
+
1069
+ all_pelvis_pos_w_cv.append(pelvis_pos_w_cv.reshape(3))
1070
+
1071
+
1072
+
1073
+ all_go_w = np.stack(all_go_w)
1074
+
1075
+ # Apply the SMPLX-only world-space rotation to global_orient.
1076
+ # all_go_w = (
1077
+ # R.from_matrix(smplx_global_y180 @ R.from_rotvec(all_go_w.astype(np.float64)).as_matrix())
1078
+ # .as_rotvec()
1079
+ # .astype(np.float32)
1080
+ # )
1081
+
1082
+ # Compute World-Space Pelvis offsets (dependent on global orient)
1083
+
1084
+ all_pelvis0_w = batch_smpl_forward(all_betas, all_go_w, all_bp, device=device)
1085
+
1086
+
1087
+
1088
+ for i in range(num_frames):
1089
+
1090
+ d = parsed_rows[i]
1091
+
1092
+ # Incam Transl
1093
+
1094
+ transl_c = (d['target_cam_cv'] - all_pelvis0[i]).astype(np.float32)
1095
+
1096
+
1097
+
1098
+ # World Transl
1099
+
1100
+ if str(args.transl_source) == "root": transl_w = all_pelvis_pos_w_cv[i].astype(np.float32)
1101
 
 
 
 
 
1102
  else: transl_w = (all_pelvis_pos_w_cv[i] - all_pelvis0_w[i]).astype(np.float32)
1103
+
1104
+
1105
+
1106
  smpl_precalc_data.append({
1107
+
1108
  "go_c": d['global_orient'], "bp": d['body_pose'], "beta": d['betas'], "tr_c": transl_c,
1109
+
1110
+ "go_w": all_go_w[i], "tr_w": transl_w,
1111
+
1112
+ "cam_rot_w_quat": d["cam_rot_w_quat"], "cam_pos_world": d["cam_pos_world"]
1113
+
1114
  })
1115
+
1116
+
1117
+
1118
  prof["smpl_batch"] = time.perf_counter() - t0_smpl
1119
+
1120
+
1121
+
1122
+ # --- Debug Verification ---
1123
+
1124
+ if args.debug:
1125
+
1126
+ try:
1127
+
1128
+ row0 = json.loads(lines[0])
1129
+
1130
+ cam_pos0 = np.asarray(row0["cam_pos_world"], dtype=np.float64).reshape(3)
1131
+
1132
+ cam_q0 = np.asarray(row0["cam_rot_world"], dtype=np.float64).reshape(4)
1133
+
1134
+ pelvis_pos0 = np.asarray(row0["pelvis_pos_world"], dtype=np.float64).reshape(3)
1135
+
1136
+ pelvis_cam_meta0 = np.asarray(row0.get("smpl_incam_transl", [0.0, 0.0, 0.0]), dtype=np.float64).reshape(3)
1137
+
1138
+
1139
+
1140
+ R_cam_w0 = R.from_quat(cam_q0).as_matrix()
1141
+
1142
+ pelvis_cam_est0 = (R_cam_w0.T @ (pelvis_pos0 - cam_pos0).reshape(3, 1)).reshape(3)
1143
+
1144
+ diff0 = pelvis_cam_est0 - pelvis_cam_meta0
1145
+
1146
+
1147
+
1148
+ Log.info(f"[Debug] {seq_name} pelvis_cam_unity check: diff={diff0.round(4)}")
1149
+
1150
+ except Exception as e:
1151
+
1152
+ Log.warning(f"[Debug] {seq_name} pelvis_cam_unity check failed: {e}")
1153
+
1154
+
1155
+
1156
  t0_gap = time.perf_counter()
1157
+
1158
  smpl_renderer = None
1159
+
1160
  vid_incam, vid_global = None, None
1161
+
1162
  debug_end_frame = min(num_frames, DEBUG_NUM_FRAMES)
1163
+
1164
  if args.debug:
1165
+
1166
  os.makedirs(os.path.join(args.output, "debug_renders"), exist_ok=True)
1167
+
1168
  if debug_end_frame > 0:
1169
+
1170
  try:
1171
+
1172
  K4_init = np.asarray(json.loads(lines[0])["cam_intrinsics"], dtype=np.float32)
1173
+
1174
  smpl_renderer = SmplIncamRenderer(W, H, K4_init, device=device)
1175
+
1176
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
1177
+
1178
  vid_incam = cv2.VideoWriter(os.path.join(args.output, "debug_renders", f"{seq_name}_incam.mp4"), fourcc, FPS, (W, H))
1179
+
1180
+ dbg_gw, dbg_gh = 960, 540
1181
+
1182
+ vid_global = cv2.VideoWriter(os.path.join(args.output, "debug_renders", f"{seq_name}_global.mp4"), fourcc, FPS, (dbg_gw, dbg_gh))
1183
+
1184
  except: pass
1185
 
1186
+
1187
+
1188
  # --- MAIN LOOP ---
1189
+
1190
+ coco_subset, img_paths, K_fullimg_all = [], [], []
1191
+
1192
  cam_T_wc_cv_all, cam_T_w2c_cv_all = [], []
1193
+
1194
  dpvo_poses, dpvo_intrinsics = [], []
1195
+
1196
+ bboxes, bbx_xys_all, kp2d_all = [], [], []
1197
+
1198
  global_orient_c_all, transl_c_all, body_pose_all, betas_all = [], [], [], []
1199
+
1200
  global_orient_w_all, transl_w_all = [], []
 
1201
 
1202
+ vit_img_batch, all_vit_features = [], []
1203
+
1204
+
1205
 
1206
  ret, _ = cap.read() # skip 0
1207
+
1208
  prof["prep"] = time.perf_counter() - t0_gap
1209
 
1210
+
1211
+
1212
  t_start_loop = time.perf_counter()
1213
+
1214
  for idx in tqdm(range(num_frames), desc="Frames", leave=False):
1215
+
1216
  t0_read = time.perf_counter()
1217
+
1218
  ret, img_bgr = cap.read()
1219
+
1220
  prof["video_read"] += (time.perf_counter() - t0_read)
1221
+
1222
  if not ret: break
1223
+
1224
+
1225
+
1226
  img_filename = f"img_{idx:05d}.jpg"
1227
+
1228
  img_abs_path = os.path.join(out_img_folder, img_filename)
1229
+
1230
+
1231
+
1232
  t0_ov = time.perf_counter()
1233
+
1234
  chat_aug.maybe_append(idx)
1235
+
1236
  chat_aug.draw(img_bgr)
1237
+
1238
  ui_aug.draw(img_bgr)
1239
+
1240
  prof["overlay"] += (time.perf_counter() - t0_ov)
1241
 
1242
+
1243
+
1244
+ row = json.loads(lines[idx])
1245
+
1246
  K4 = np.asarray(row["cam_intrinsics"], dtype=np.float32)
1247
+
1248
  kpts_raw = np.asarray(row["kpts_2d"], dtype=np.float32).reshape(-1, 2)[:17]
1249
+
1250
  vis_raw = np.asarray(row["kpts_vis"], dtype=np.int32)[:17]
1251
+
1252
+ if vis_raw.shape[0] >= 5: vis_raw[3] = 1; vis_raw[4] = 1
1253
+
1254
  bbox = clamp_bbox_xywh_to_image(row["bbox"], W, H)
1255
+
1256
+
1257
+
1258
  sd = smpl_precalc_data[idx]
1259
+
1260
  global_orient_c_all.append(sd['go_c'])
1261
+
1262
  transl_c_all.append(sd['tr_c'])
1263
+
1264
  global_orient_w_all.append(sd['go_w'])
1265
+
1266
  transl_w_all.append(sd['tr_w'])
1267
+
1268
  body_pose_all.append(sd['bp'])
1269
+
1270
  betas_all.append(sd['beta'])
1271
+
1272
+
1273
+
1274
  bboxes.append(np.asarray(bbox, dtype=np.float32))
1275
+
1276
  bbx_xys_all.append(bbox_xywh_to_bbx_xys(bbox))
1277
+
1278
  kp2d_all.append(np.concatenate([kpts_raw, (vis_raw > 0).astype(np.float32)[:, None]], axis=1))
1279
+
1280
  K_fullimg_all.append(k4_to_K3(K4))
1281
 
1282
+
1283
+
1284
  img_rel = os.path.join("images", seq_name, img_filename).replace("\\", "/")
1285
+
1286
  img_paths.append(img_rel)
1287
+
1288
+
1289
+
1290
+ # Use raw Unity values
1291
+
1292
+ p_w = np.asarray(sd["cam_pos_world"], dtype=np.float32)
1293
+
1294
+ q_w = np.asarray(sd["cam_rot_w_quat"], dtype=np.float32)
1295
+
1296
+
1297
+
1298
+ # 1. Build the Standard Unity-to-CV Matrix (C @ M @ C)
1299
+
1300
  cam_T_wc = build_T_wc(p_w, q_w)
1301
+
1302
+ cam_T_wc_cv_raw = (C4 @ cam_T_wc @ C4)
1303
+
1304
+
1305
+
1306
+ # 2. APPLY THE FIX (Z-180) to the Camera, matching precalc loop
1307
+
1308
+ cam_T_wc_cv = (fix_mat @ cam_T_wc_cv_raw).astype(np.float32)
1309
+
1310
+
1311
+
1312
+ # 3. Invert for W2C
1313
+
1314
+ cam_T_w2c_cv = np.linalg.inv(cam_T_wc_cv)
1315
+
1316
+
1317
+
1318
  cam_T_wc_cv_all.append(cam_T_wc_cv)
1319
+
1320
  cam_T_w2c_cv_all.append(cam_T_w2c_cv)
1321
+
1322
  dpvo_poses.append(f"{p_w[0]} {p_w[1]} {p_w[2]} {q_w[0]} {q_w[1]} {q_w[2]} {q_w[3]}")
1323
+
1324
  dpvo_intrinsics.append(K4.astype(np.float32))
1325
 
1326
+
1327
+
1328
  if args.genmo and vit_model is not None:
1329
+
1330
  t0_vit = time.perf_counter()
1331
+
1332
+ img_tensor = _process_image_memory(img_bgr, bbox, img_size=256)
1333
+
1334
  vit_img_batch.append(img_tensor)
1335
 
1336
  if len(vit_img_batch) >= args.vit_batch_size:
1337
+
1338
  batch_np = np.stack(vit_img_batch)
1339
+
1340
  batch_t = torch.from_numpy(batch_np).to(device, non_blocking=True)
1341
+
1342
  with torch.inference_mode():
1343
+
1344
  with torch.amp.autocast("cuda"):
1345
+
1346
  feats = vit_model({"img": batch_t})
1347
+
1348
  all_vit_features.append(feats.detach().cpu())
1349
+
1350
  vit_img_batch = []
1351
+
1352
  prof["vit_process"] += (time.perf_counter() - t0_vit)
1353
 
1354
+
1355
+
1356
  if args.vitpose and (idx in selected_set):
1357
+
1358
  t0_wr = time.perf_counter()
1359
+
1360
  cv2.imwrite(img_abs_path, img_bgr, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
1361
+
1362
  kpts_coco = []
1363
+
1364
  for k in range(17): kpts_coco.extend([float(kpts_raw[k, 0]), float(kpts_raw[k, 1]), int(vis_raw[k])])
1365
+
1366
  coco_subset.append(({"file_name": img_rel, "width": W, "height": H},
1367
+
1368
  {"category_id": 1, "bbox": bbox, "area": float(bbox[2]*bbox[3]), "iscrowd": 0, "keypoints": kpts_coco, "num_keypoints": int(np.sum(vis_raw > 0))}))
1369
+
1370
  prof["sparse_write"] += (time.perf_counter() - t0_wr)
1371
 
1372
+
1373
+
1374
  if args.debug and idx < debug_end_frame and smpl_renderer:
1375
+
1376
  t0_dbg = time.perf_counter()
1377
+
1378
  dbg = img_bgr.copy()
1379
+
1380
  try: draw_bbox_xywh_and_center(dbg, bbox)
1381
+
1382
  except: pass
1383
+
1384
  try:
1385
+
1386
  rgb = smpl_renderer.render(dbg[:, :, ::-1].copy(), sd['go_c'], sd['bp'], sd['beta'], sd['tr_c'], K4[:2], K4[2:])
1387
+
1388
  dbg = rgb[:, :, ::-1].copy()
1389
+
1390
  except: pass
1391
+
1392
  if not args.debug_no_coco:
1393
+
1394
  draw_vis_text_and_points(dbg, kpts_raw, vis_raw)
1395
+
1396
  if vid_incam: vid_incam.write(dbg)
1397
+
1398
+
1399
+
1400
  if vid_global:
1401
+
1402
  verts_w = smpl_renderer.get_verts(sd['go_w'], sd['bp'], sd['beta'], sd['tr_w']).float()
1403
+
1404
  debug_global_verts_cpu.append(verts_w.detach().cpu())
1405
+
1406
  prof["debug_rend"] += (time.perf_counter() - t0_dbg)
1407
 
1408
+
1409
+
1410
  if args.genmo and len(vit_img_batch) > 0 and vit_model is not None:
1411
+
1412
  t0_vit = time.perf_counter()
1413
+
1414
  batch_np = np.stack(vit_img_batch)
1415
+
1416
  batch_t = torch.from_numpy(batch_np).to(device, non_blocking=True)
1417
+
1418
  with torch.inference_mode():
1419
+
1420
  with torch.amp.autocast("cuda"):
1421
+
1422
  feats = vit_model({"img": batch_t})
1423
+
1424
  all_vit_features.append(feats.detach().cpu())
1425
+
1426
  prof["vit_process"] += (time.perf_counter() - t0_vit)
1427
 
1428
+
1429
+
1430
  prof["loop_total"] = time.perf_counter() - t_start_loop
1431
+
1432
  cap.release()
1433
+
1434
  if vid_incam: vid_incam.release()
1435
+
1436
+
1437
+
1438
  t0_dbg = time.perf_counter()
1439
+
1440
  if vid_global and len(debug_global_verts_cpu) > 0:
1441
+
1442
  try:
1443
+
1444
+ from hmr4d.utils.vis.renderer import (
1445
+
1446
+ Renderer,
1447
+
1448
+ get_global_cameras_static,
1449
+
1450
+ get_ground_params_from_points,
1451
+
1452
+ perspective_projection,
1453
+
1454
+ )
1455
+
1456
  from hmr4d.utils.geo.hmr_cam import create_camera_sensor
1457
+
1458
+
1459
+
1460
+ dbg_gw, dbg_gh = 960, 540
1461
+
1462
+ _, _, K_global = create_camera_sensor(dbg_gw, dbg_gh, 24)
1463
+
1464
+ global_renderer = Renderer(dbg_gw, dbg_gh, device=device, faces=smpl_renderer.faces, K=K_global.to(device), bin_size=0)
1465
+
1466
  verts_seq = torch.stack(debug_global_verts_cpu, dim=0)
1467
+
1468
  off = verts_seq[0].mean(0); off[1] = verts_seq[0, :, 1].min()
1469
+
1470
  verts_seq = verts_seq - off
1471
+
1472
+
1473
+
1474
+ # Convert CV-cam to GPU tensor for visualizer
1475
+
1476
+ cam_centers = None
1477
+
1478
+ try:
1479
+
1480
+ F = int(verts_seq.shape[0])
1481
+
1482
+ if len(cam_T_wc_cv_all) >= F:
1483
+
1484
+ cam_wc = np.stack(cam_T_wc_cv_all[:F], axis=0).astype(np.float32)
1485
+
1486
+ cam_centers = torch.from_numpy(cam_wc[:, :3, 3]).to(device=device)
1487
+
1488
+ cam_centers = cam_centers - off.to(device=device)[None]
1489
+
1490
+ except Exception:
1491
+
1492
+ cam_centers = None
1493
+
1494
+
1495
+
1496
+ g_R, g_T, g_L = get_global_cameras_static(
1497
+
1498
+ verts_seq, beta=2.0, cam_height_degree=20, target_center_height=1.0, device=device
1499
+
1500
+ )
1501
+
1502
+
1503
+
1504
  if global_J_reg is not None and verts_seq.shape[1] == global_J_reg.shape[-1]:
1505
+
1506
+ joints_seq = torch.einsum("jv,fvk->fjk", global_J_reg.cpu(), verts_seq)
1507
+
1508
+ roots = joints_seq[:, 0]
1509
+
1510
+ else:
1511
+
1512
+ roots = verts_seq.mean(1)
1513
+
1514
  sc, cx, cz = get_ground_params_from_points(roots, verts_seq)
1515
+
1516
  global_renderer.set_ground(sc * 1.5, cx, cz)
1517
+
1518
  col = torch.tensor([[0.0, 1.0, 0.0]], device=device)
1519
+
1520
+ trail = []
1521
+
1522
+
1523
+
1524
+ def _project_xy(points_w: torch.Tensor):
1525
+
1526
+ P = points_w.view(1, -1, 3)
1527
+
1528
+ x2d = perspective_projection(P, global_renderer.K, global_renderer.R, global_renderer.T.reshape(1, 3, 1))[0]
1529
+
1530
+ return x2d
1531
+
1532
+
1533
+
1534
+ def _draw_polyline(img_bgr, pts_xy, color, closed=False, thickness=1):
1535
+
1536
+ pts = np.asarray(pts_xy, dtype=np.int32).reshape(-1, 1, 2)
1537
+
1538
+ if len(pts) < 2: return
1539
+
1540
+ cv2.polylines(img_bgr, [pts], bool(closed), color, int(thickness), cv2.LINE_AA)
1541
+
1542
+
1543
+
1544
+ def _draw_camera_box_axes(img_bgr, C_w, right, up, fwd, scale=0.25):
1545
+
1546
+ C_w = C_w.reshape(3)
1547
+
1548
+ right = right.reshape(3)
1549
+
1550
+ up = up.reshape(3)
1551
+
1552
+ fwd = fwd.reshape(3)
1553
+
1554
+ L = float(scale)
1555
+
1556
+
1557
+
1558
+ # Draw Axis instead of just box (RGB = XYZ)
1559
+
1560
+ # X (Right) - Red
1561
+
1562
+ p_x = C_w + L * right
1563
+
1564
+ xy_x = _project_xy(torch.stack([C_w, p_x])).detach().cpu().numpy()
1565
+
1566
+ _draw_polyline(img_bgr, xy_x, (0, 0, 255), thickness=2)
1567
+
1568
+
1569
+
1570
+ # Y (Up/Down) - Green
1571
+
1572
+ p_y = C_w + L * up
1573
+
1574
+ xy_y = _project_xy(torch.stack([C_w, p_y])).detach().cpu().numpy()
1575
+
1576
+ _draw_polyline(img_bgr, xy_y, (0, 255, 0), thickness=2)
1577
+
1578
+
1579
+
1580
+ # Z (Fwd) - Blue
1581
+
1582
+ p_z = C_w + L * fwd
1583
+
1584
+ xy_z = _project_xy(torch.stack([C_w, p_z])).detach().cpu().numpy()
1585
+
1586
+ _draw_polyline(img_bgr, xy_z, (255, 0, 0), thickness=2)
1587
+
1588
+
1589
+
1590
  for i in range(len(verts_seq)):
1591
+
1592
  cam = global_renderer.create_camera(g_R[i], g_T[i])
1593
+
1594
  img = global_renderer.render_with_ground(verts_seq[i].to(device)[None], col, cam, g_L)
1595
+
1596
+ img_bgr = img[:, :, ::-1].copy()
1597
+
1598
+
1599
+
1600
+ if cam_centers is not None and i < cam_centers.shape[0]:
1601
+
1602
+ try:
1603
+
1604
+ # Blue ray: camera center -> SMPL root
1605
+
1606
+ if i < roots.shape[0]:
1607
+
1608
+ pts_line = torch.stack([cam_centers[i], roots[i].to(device=device)], dim=0)
1609
+
1610
+ xy_line = _project_xy(pts_line).detach().cpu().numpy()
1611
+
1612
+ _draw_polyline(img_bgr, xy_line, (255, 200, 50), closed=False, thickness=1)
1613
+
1614
+
1615
+
1616
+ P = cam_centers[i].view(1, 3)
1617
+
1618
+ x2d = _project_xy(P)[0]
1619
+
1620
+ x, y = int(round(float(x2d[0].item()))), int(round(float(x2d[1].item())))
1621
+
1622
+ if 0 <= x < img_bgr.shape[1] and 0 <= y < img_bgr.shape[0]:
1623
+
1624
+ trail.append((x, y))
1625
+
1626
+ cv2.circle(img_bgr, (x, y), 3, (0, 0, 255), -1)
1627
+
1628
+ if len(trail) >= 2:
1629
+
1630
+ cv2.polylines(img_bgr, [np.array(trail, dtype=np.int32)], False, (0, 0, 255), 1)
1631
+
1632
+
1633
+
1634
+ if len(cam_T_wc_cv_all) > i:
1635
+
1636
+ R_c2w = torch.from_numpy(np.asarray(cam_T_wc_cv_all[i], dtype=np.float32)[:3, :3]).to(device=device)
1637
+
1638
+ C_w = cam_centers[i]
1639
+
1640
+ right = R_c2w[:, 0]
1641
+
1642
+ up = R_c2w[:, 1]
1643
+
1644
+ fwd = R_c2w[:, 2]
1645
+
1646
+ _draw_camera_box_axes(img_bgr, C_w, right, up, fwd, scale=0.35)
1647
+
1648
+ except Exception: pass
1649
+
1650
+
1651
+
1652
+ vid_global.write(img_bgr)
1653
+
1654
  except: pass
1655
+
1656
  vid_global.release()
1657
+
1658
  prof["debug_rend"] += (time.perf_counter() - t0_dbg)
1659
+
1660
+
1661
+
1662
  t0_save = time.perf_counter()
1663
+
1664
  if args.genmo:
1665
+
1666
  trans_w = np.stack(transl_w_all).astype(np.float32)
1667
+
1668
  world_off = trans_w[0].copy(); world_off[1] -= float(args.world_y_offset_m)
1669
+
1670
  trans_w_centered = trans_w - world_off[None]
1671
+
1672
  mats_w2c = np.stack(cam_T_w2c_cv_all).astype(np.float32)
1673
+
1674
  mats_wc = np.stack(cam_T_wc_cv_all).astype(np.float32)
1675
+
1676
  T_wp_w = np.eye(4, dtype=np.float32); T_wp_w[:3, 3] = world_off
1677
+
1678
  T_w_wp = np.eye(4, dtype=np.float32); T_w_wp[:3, 3] = -world_off
1679
+
1680
  mats_w2c_c = np.matmul(mats_w2c, T_wp_w[None])
1681
+
1682
  mats_wc_c = np.matmul(T_w_wp[None], mats_wc)
1683
+
1684
  cam_av, cam_tv = compute_velocity(mats_wc_c, fps=FPS)
1685
+
1686
+
1687
+
1688
  f_imgseq = torch.cat(all_vit_features, dim=0).float() if all_vit_features else torch.empty(0)
1689
 
1690
+
1691
+
1692
  g_dict = {
1693
+
1694
  "smpl_params_c": {"global_orient": torch.from_numpy(np.stack(global_orient_c_all)), "body_pose": torch.from_numpy(np.stack(body_pose_all)), "transl": torch.from_numpy(np.stack(transl_c_all)), "betas": torch.from_numpy(np.stack(betas_all))},
1695
+
1696
  "smpl_params_w": {"global_orient": torch.from_numpy(np.stack(global_orient_w_all)), "body_pose": torch.from_numpy(np.stack(body_pose_all)), "transl": torch.from_numpy(trans_w_centered), "betas": torch.from_numpy(np.stack(betas_all))},
1697
+
1698
  "T_w2c": torch.from_numpy(mats_w2c_c), "K_fullimg": torch.from_numpy(np.stack(K_fullimg_all)),
1699
+
1700
  "kp2d": torch.from_numpy(np.stack(kp2d_all)), "bbx_xys": torch.from_numpy(np.stack(bbx_xys_all)),
1701
+
1702
  "cam_angvel": torch.from_numpy(cam_av), "cam_tvel": torch.from_numpy(cam_tv),
1703
+
1704
  "imgname": img_paths, "valid_mask": torch.ones(len(img_paths), dtype=torch.float32),
1705
+
1706
  "world_offset": torch.from_numpy(world_off.astype(np.float32)),
1707
+
1708
+ "f_imgseq": f_imgseq
1709
+
1710
  }
1711
+
1712
  torch.save(g_dict, genmo_out)
1713
 
1714
+
1715
+
1716
  if args.smplx:
1717
+
1718
  poses66 = np.concatenate([np.stack(global_orient_w_all), np.stack(body_pose_all)], axis=1)
1719
+
1720
  poses165 = np.pad(poses66, ((0,0),(0,99)), mode="constant").astype(np.float32)
1721
+
1722
  trans_w = np.stack(transl_w_all).astype(np.float32)
1723
+
1724
  world_off = trans_w[0].copy(); world_off[1] -= float(args.world_y_offset_m)
1725
+
1726
  trans_w = trans_w - world_off[None]
1727
+
1728
  np.savez(smplx_global_out, mocap_framerate=int(FPS), gender="neutral", betas=betas_all[0], trans=trans_w, poses=poses165, world_offset=world_off)
1729
 
1730
+
1731
+
1732
  if args.vitpose and coco_subset:
1733
+
1734
  with open(os.path.join(temp_ann_dir, f"{seq_name}.json"), "w") as f: json.dump(coco_subset, f)
1735
+
1736
+
1737
+
1738
  prof["save_files"] = time.perf_counter() - t0_save
1739
+
1740
  total_t = time.perf_counter() - t_start_seq
1741
+
1742
+
1743
+
1744
  print(f" > Done in {total_t:.2f}s | FPS: {num_frames/total_t:.1f}")
1745
+
1746
  print(f" [Breakdown] BatchPrep: {prof['smpl_batch']:.2f}s | Init/Gap: {prof['prep']:.2f}s | Read: {prof['video_read']:.2f}s")
1747
+
1748
  print(f" Overlay: {prof['overlay']:.2f}s | SparseWrite: {prof['sparse_write']:.2f}s | ViT: {prof['vit_process']:.2f}s")
1749
+
1750
  print(f" DbgRend: {prof['debug_rend']:.2f}s | SaveFiles: {prof['save_files']:.2f}s")
1751
 
1752
+
1753
+
1754
  print("All sequences processed.")
1755
 
1756
+
1757
+
1758
  if __name__ == "__main__":
1759
+
1760
+ main()
train.log CHANGED
@@ -3197,3 +3197,635 @@ full_key: dataset_opts.train.unity
3197
  [12/30 22:16:05][INFO] [UnityDataset] Found 5 sequences.
3198
  [12/30 22:16:05][INFO] [Val Dataset][7/7]: name=unity_val, size=5, genmo.datasets.unity_dataset.UnityDataset
3199
  [12/30 22:16:05][INFO]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3197
  [12/30 22:16:05][INFO] [UnityDataset] Found 5 sequences.
3198
  [12/30 22:16:05][INFO] [Val Dataset][7/7]: name=unity_val, size=5, genmo.datasets.unity_dataset.UnityDataset
3199
  [12/30 22:16:05][INFO]
3200
+ [12/30 22:26:06][INFO] [Exp Name]: finetune_
3201
+ [12/30 22:26:06][INFO] [GPU x Batch] = 1 x 1
3202
+ [12/30 22:26:06][INFO] [UnityDataset] Found 5 sequences.
3203
+ [12/30 22:26:06][INFO] [Train Dataset][9/9]: name=unity, size=5, genmo.datasets.unity_dataset.UnityDataset
3204
+ [12/30 22:26:06][INFO] [Train Dataset][All]: ConcatDataset size=5
3205
+ [12/30 22:26:06][INFO]
3206
+ [12/30 22:26:06][INFO] [UnityDataset] Found 5 sequences.
3207
+ [12/30 22:26:06][INFO] [Val Dataset][7/7]: name=unity_val, size=5, genmo.datasets.unity_dataset.UnityDataset
3208
+ [12/30 22:26:06][INFO]
3209
+ [12/30 22:26:11][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3210
+ [12/30 22:26:42][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_3/checkpoints'
3211
+ [12/30 22:26:54][INFO] Start Fitting...
3212
+ [12/30 22:26:56][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3213
+
3214
+ [12/30 22:26:56][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3215
+
3216
+ [12/30 22:26:56][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3217
+
3218
+ [12/30 22:26:56][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3219
+ [12/30 22:27:28][INFO] [Exp Name]: finetune_
3220
+ [12/30 22:27:28][INFO] [GPU x Batch] = 1 x 1
3221
+ [12/30 22:27:28][INFO] [UnityDataset] Found 5 sequences.
3222
+ [12/30 22:27:28][INFO] [Train Dataset][9/9]: name=unity, size=5, genmo.datasets.unity_dataset.UnityDataset
3223
+ [12/30 22:27:28][INFO] [Train Dataset][All]: ConcatDataset size=5
3224
+ [12/30 22:27:28][INFO]
3225
+ [12/30 22:27:28][INFO] [UnityDataset] Found 5 sequences.
3226
+ [12/30 22:27:28][INFO] [Val Dataset][7/7]: name=unity_val, size=5, genmo.datasets.unity_dataset.UnityDataset
3227
+ [12/30 22:27:28][INFO]
3228
+ [12/30 22:27:37][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3229
+ [12/30 22:27:56][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_4/checkpoints'
3230
+ [12/30 22:28:08][INFO] Start Fitting...
3231
+ [12/30 22:28:11][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3232
+
3233
+ [12/30 22:28:11][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3234
+
3235
+ [12/30 22:28:11][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3236
+
3237
+ [12/30 22:28:11][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3238
+ [12/30 22:29:56][INFO] [Exp Name]: finetune_
3239
+ [12/30 22:29:56][INFO] [GPU x Batch] = 1 x 1
3240
+ [12/30 22:29:56][INFO] [UnityDataset] Found 2 sequences.
3241
+ [12/30 22:29:56][INFO] [Train Dataset][9/9]: name=unity, size=2, genmo.datasets.unity_dataset.UnityDataset
3242
+ [12/30 22:29:56][INFO] [Train Dataset][All]: ConcatDataset size=2
3243
+ [12/30 22:29:56][INFO]
3244
+ [12/30 22:29:56][INFO] [UnityDataset] Found 2 sequences.
3245
+ [12/30 22:29:56][INFO] [Val Dataset][7/7]: name=unity_val, size=2, genmo.datasets.unity_dataset.UnityDataset
3246
+ [12/30 22:29:56][INFO]
3247
+ [12/30 22:30:02][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3248
+ [12/30 22:30:17][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_5/checkpoints'
3249
+ [12/30 22:30:30][INFO] Start Fitting...
3250
+ [12/30 22:30:31][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3251
+
3252
+ [12/30 22:30:31][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3253
+
3254
+ [12/30 22:30:31][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3255
+
3256
+ [12/30 22:30:31][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3257
+ [12/30 22:56:38][INFO] [Exp Name]: finetune_
3258
+ [12/30 22:56:38][INFO] [GPU x Batch] = 1 x 1
3259
+ [12/30 22:56:38][INFO] [UnityDataset] Found 6 sequences.
3260
+ [12/30 22:56:38][INFO] [Train Dataset][9/9]: name=unity, size=6, genmo.datasets.unity_dataset.UnityDataset
3261
+ [12/30 22:56:38][INFO] [Train Dataset][All]: ConcatDataset size=6
3262
+ [12/30 22:56:38][INFO]
3263
+ [12/30 22:56:38][INFO] [UnityDataset] Found 6 sequences.
3264
+ [12/30 22:56:38][INFO] [Val Dataset][7/7]: name=unity_val, size=6, genmo.datasets.unity_dataset.UnityDataset
3265
+ [12/30 22:56:38][INFO]
3266
+ [12/30 22:56:44][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3267
+ [12/30 22:57:07][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_6/checkpoints'
3268
+ [12/30 22:57:27][INFO] Start Fitting...
3269
+ [12/30 22:57:30][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3270
+
3271
+ [12/30 22:57:30][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3272
+
3273
+ [12/30 22:57:31][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3274
+
3275
+ [12/30 22:57:31][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3276
+ [12/30 22:57:34][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3277
+ return F.conv1d(input, weight, bias, self.stride,
3278
+
3279
+ [12/30 22:57:36][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3280
+ return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
3281
+
3282
+ [12/30 22:57:40][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
3283
+ warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
3284
+
3285
+ [12/30 22:57:47][WARNING] [VisUnityVal] Failed to read image: third_party/GVHMR/processed_dataset/images/0_biboo_birthday_speech/img_00699.jpg
3286
+ [12/30 22:58:14][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3287
+
3288
+ [12/30 22:58:14][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3289
+
3290
+ [12/30 22:58:14][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3291
+
3292
+ [12/30 22:58:14][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3293
+
3294
+ [12/30 22:58:14][INFO] βœ…[FIT][Epoch 0] finished! 00:46β†’03:06 | loss_epoch=28
3295
+ [12/30 23:01:18][INFO] [Exp Name]: finetune_
3296
+ [12/30 23:01:18][INFO] [GPU x Batch] = 1 x 1
3297
+ [12/30 23:01:18][INFO] [UnityDataset] Found 6 sequences.
3298
+ [12/30 23:01:18][INFO] [Train Dataset][9/9]: name=unity, size=6, genmo.datasets.unity_dataset.UnityDataset
3299
+ [12/30 23:01:18][INFO] [Train Dataset][All]: ConcatDataset size=6
3300
+ [12/30 23:01:18][INFO]
3301
+ [12/30 23:01:18][INFO] [UnityDataset] Found 6 sequences.
3302
+ [12/30 23:01:18][INFO] [Val Dataset][7/7]: name=unity_val, size=6, genmo.datasets.unity_dataset.UnityDataset
3303
+ [12/30 23:01:18][INFO]
3304
+ [12/30 23:01:26][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3305
+ [12/30 23:01:45][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_0/checkpoints'
3306
+ [12/30 23:01:57][INFO] Start Fitting...
3307
+ [12/30 23:01:59][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3308
+
3309
+ [12/30 23:01:59][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3310
+
3311
+ [12/30 23:01:59][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3312
+
3313
+ [12/30 23:01:59][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3314
+ [12/30 23:02:01][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3315
+ return F.conv1d(input, weight, bias, self.stride,
3316
+
3317
+ [12/30 23:02:03][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3318
+ return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
3319
+
3320
+ [12/30 23:02:07][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
3321
+ warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
3322
+
3323
+ [12/30 23:02:13][WARNING] [VisUnityVal] Failed to read image: third_party/GVHMR/processed_dataset/images/0_biboo_birthday_speech/img_00699.jpg
3324
+ [12/30 23:02:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3325
+
3326
+ [12/30 23:02:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3327
+
3328
+ [12/30 23:02:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3329
+
3330
+ [12/30 23:02:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3331
+
3332
+ [12/30 23:02:41][INFO] βœ…[FIT][Epoch 0] finished! 00:43β†’02:53 | loss_epoch=28
3333
+ [12/30 23:09:40][INFO] [Exp Name]: finetune_
3334
+ [12/30 23:09:40][INFO] [GPU x Batch] = 1 x 1
3335
+ [12/30 23:09:41][INFO] [UnityDataset] Found 6 sequences.
3336
+ [12/30 23:09:41][INFO] [Train Dataset][9/9]: name=unity, size=6, genmo.datasets.unity_dataset.UnityDataset
3337
+ [12/30 23:09:41][INFO] [Train Dataset][All]: ConcatDataset size=6
3338
+ [12/30 23:09:41][INFO]
3339
+ [12/30 23:09:41][INFO] [UnityDataset] Found 6 sequences.
3340
+ [12/30 23:09:41][INFO] [Val Dataset][7/7]: name=unity_val, size=6, genmo.datasets.unity_dataset.UnityDataset
3341
+ [12/30 23:09:41][INFO]
3342
+ [12/30 23:09:49][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3343
+ [12/30 23:10:08][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_1/checkpoints'
3344
+ [12/30 23:10:17][INFO] Start Fitting...
3345
+ [12/30 23:10:18][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3346
+
3347
+ [12/30 23:10:18][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3348
+
3349
+ [12/30 23:10:18][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3350
+
3351
+ [12/30 23:10:18][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3352
+ [12/30 23:10:19][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3353
+ return F.conv1d(input, weight, bias, self.stride,
3354
+
3355
+ [12/30 23:10:20][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3356
+ return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
3357
+
3358
+ [12/30 23:10:24][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
3359
+ warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
3360
+
3361
+ [12/30 23:10:44][INFO] [Exp Name]: finetune_
3362
+ [12/30 23:10:44][INFO] [GPU x Batch] = 1 x 1
3363
+ [12/30 23:10:44][INFO] [UnityDataset] Found 6 sequences.
3364
+ [12/30 23:10:44][INFO] [Train Dataset][9/9]: name=unity, size=6, genmo.datasets.unity_dataset.UnityDataset
3365
+ [12/30 23:10:44][INFO] [Train Dataset][All]: ConcatDataset size=6
3366
+ [12/30 23:10:44][INFO]
3367
+ [12/30 23:10:44][INFO] [UnityDataset] Found 6 sequences.
3368
+ [12/30 23:10:44][INFO] [Val Dataset][7/7]: name=unity_val, size=6, genmo.datasets.unity_dataset.UnityDataset
3369
+ [12/30 23:10:44][INFO]
3370
+ [12/30 23:10:52][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3371
+ [12/30 23:11:04][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_2/checkpoints'
3372
+ [12/30 23:11:11][INFO] Start Fitting...
3373
+ [12/30 23:11:13][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3374
+
3375
+ [12/30 23:11:13][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3376
+
3377
+ [12/30 23:11:13][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3378
+
3379
+ [12/30 23:11:13][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3380
+ [12/30 23:11:14][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3381
+ return F.conv1d(input, weight, bias, self.stride,
3382
+
3383
+ [12/30 23:11:15][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3384
+ return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
3385
+
3386
+ [12/30 23:11:19][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
3387
+ warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
3388
+
3389
+ [12/30 23:11:27][WARNING] [VisUnityVal] Failed to read image: third_party/GVHMR/processed_dataset/images/0_biboo_birthday_speech/img_00699.jpg
3390
+ [12/30 23:11:53][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3391
+
3392
+ [12/30 23:11:53][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3393
+
3394
+ [12/30 23:11:53][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3395
+
3396
+ [12/30 23:11:53][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3397
+
3398
+ [12/30 23:11:53][INFO] βœ…[FIT][Epoch 0] finished! 00:40β†’02:43 | loss_epoch=28
3399
+ [12/30 23:29:33][INFO] [Exp Name]: finetune_
3400
+ [12/30 23:29:33][INFO] [GPU x Batch] = 1 x 1
3401
+ [12/30 23:29:33][INFO] [UnityDataset] Found 2 sequences.
3402
+ [12/30 23:29:33][INFO] [Train Dataset][9/9]: name=unity, size=2, genmo.datasets.unity_dataset.UnityDataset
3403
+ [12/30 23:29:33][INFO] [Train Dataset][All]: ConcatDataset size=2
3404
+ [12/30 23:29:33][INFO]
3405
+ [12/30 23:29:33][INFO] [UnityDataset] Found 2 sequences.
3406
+ [12/30 23:29:33][INFO] [Val Dataset][7/7]: name=unity_val, size=2, genmo.datasets.unity_dataset.UnityDataset
3407
+ [12/30 23:29:33][INFO]
3408
+ [12/30 23:29:39][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3409
+ [12/30 23:30:02][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_3/checkpoints'
3410
+ [12/30 23:30:13][INFO] Start Fitting...
3411
+ [12/30 23:30:14][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3412
+
3413
+ [12/30 23:30:14][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3414
+
3415
+ [12/30 23:30:14][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3416
+
3417
+ [12/30 23:30:14][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3418
+ [12/30 23:30:15][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3419
+ return F.conv1d(input, weight, bias, self.stride,
3420
+
3421
+ [12/30 23:30:17][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3422
+ return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
3423
+
3424
+ [12/30 23:30:18][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
3425
+ warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
3426
+
3427
+ [12/30 23:30:30][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9930 pred=+0.9643 delta(pred-gt)=-0.0287
3428
+ [12/30 23:30:30][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.03876931 -0.17480041 0.02509396] global_orient0_aa(pred)=[-0.1090048 -1.7763788 -0.15125035]
3429
+ [12/30 23:30:30][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(-9.99,+2.34,+1.24) pred=(-101.99,-9.31,-0.53) pred_vs_gt=(-91.73,-11.16,-3.80)
3430
+ [12/30 23:30:30][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=+94.23
3431
+ [12/30 23:46:28][INFO] [Exp Name]: finetune_
3432
+ [12/30 23:46:28][INFO] [GPU x Batch] = 1 x 1
3433
+ [12/30 23:46:28][INFO] [UnityDataset] Found 2 sequences.
3434
+ [12/30 23:46:28][INFO] [Train Dataset][9/9]: name=unity, size=2, genmo.datasets.unity_dataset.UnityDataset
3435
+ [12/30 23:46:28][INFO] [Train Dataset][All]: ConcatDataset size=2
3436
+ [12/30 23:46:28][INFO]
3437
+ [12/30 23:46:28][INFO] [UnityDataset] Found 2 sequences.
3438
+ [12/30 23:46:28][INFO] [Val Dataset][7/7]: name=unity_val, size=2, genmo.datasets.unity_dataset.UnityDataset
3439
+ [12/30 23:46:28][INFO]
3440
+ [12/30 23:46:34][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3441
+ [12/30 23:46:54][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_4/checkpoints'
3442
+ [12/30 23:47:05][INFO] Start Fitting...
3443
+ [12/30 23:47:07][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3444
+
3445
+ [12/30 23:47:07][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3446
+
3447
+ [12/30 23:47:07][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3448
+
3449
+ [12/30 23:47:07][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3450
+ [12/30 23:47:09][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3451
+ return F.conv1d(input, weight, bias, self.stride,
3452
+
3453
+ [12/30 23:47:11][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3454
+ return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
3455
+
3456
+ [12/30 23:47:12][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
3457
+ warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
3458
+
3459
+ [12/30 23:47:26][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9930 pred=+0.9643 delta(pred-gt)=-0.0287
3460
+ [12/30 23:47:26][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.03876931 -0.17480041 0.02509396] global_orient0_aa(pred)=[-0.1090048 -1.7763788 -0.15125035]
3461
+ [12/30 23:47:26][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(-9.99,+2.34,+1.24) pred=(-101.99,-9.31,-0.53) pred_vs_gt=(-91.73,-11.16,-3.80)
3462
+ [12/30 23:47:26][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=+94.23
3463
+ [12/30 23:51:42][INFO] [Exp Name]: finetune_
3464
+ [12/30 23:51:42][INFO] [GPU x Batch] = 1 x 1
3465
+ [12/30 23:51:42][INFO] [UnityDataset] Found 2 sequences.
3466
+ [12/30 23:51:42][INFO] [Train Dataset][9/9]: name=unity, size=2, genmo.datasets.unity_dataset.UnityDataset
3467
+ [12/30 23:51:42][INFO] [Train Dataset][All]: ConcatDataset size=2
3468
+ [12/30 23:51:42][INFO]
3469
+ [12/30 23:51:42][INFO] [UnityDataset] Found 2 sequences.
3470
+ [12/30 23:51:42][INFO] [Val Dataset][7/7]: name=unity_val, size=2, genmo.datasets.unity_dataset.UnityDataset
3471
+ [12/30 23:51:42][INFO]
3472
+ [12/30 23:51:48][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3473
+ [12/30 23:52:04][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_5/checkpoints'
3474
+ [12/30 23:52:15][INFO] Start Fitting...
3475
+ [12/30 23:52:16][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3476
+
3477
+ [12/30 23:52:16][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3478
+
3479
+ [12/30 23:52:16][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3480
+
3481
+ [12/30 23:52:16][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3482
+ [12/30 23:52:18][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3483
+ return F.conv1d(input, weight, bias, self.stride,
3484
+
3485
+ [12/30 23:52:20][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3486
+ return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
3487
+
3488
+ [12/30 23:52:21][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
3489
+ warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
3490
+
3491
+ [12/30 23:52:35][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9930 pred=+0.9643 delta(pred-gt)=-0.0287
3492
+ [12/30 23:52:35][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.03876931 -0.17480041 0.02509396] global_orient0_aa(pred)=[-0.1090048 -1.7763788 -0.15125035]
3493
+ [12/30 23:52:35][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(-9.99,+2.34,+1.24) pred=(-101.99,-9.31,-0.53) pred_vs_gt=(-91.73,-11.16,-3.80)
3494
+ [12/30 23:52:35][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=+94.23
3495
+ [12/30 23:53:24][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3496
+
3497
+ [12/30 23:53:24][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3498
+
3499
+ [12/30 23:53:24][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3500
+
3501
+ [12/30 23:53:24][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3502
+
3503
+ [12/30 23:53:24][INFO] βœ…[FIT][Epoch 0] finished! 01:08β†’04:34 | loss_epoch=24.5
3504
+ [12/30 23:55:59][INFO] [Exp Name]: finetune_
3505
+ [12/30 23:55:59][INFO] [GPU x Batch] = 1 x 1
3506
+ [12/30 23:55:59][INFO] [UnityDataset] Found 2 sequences.
3507
+ [12/30 23:55:59][INFO] [Train Dataset][9/9]: name=unity, size=2, genmo.datasets.unity_dataset.UnityDataset
3508
+ [12/30 23:55:59][INFO] [Train Dataset][All]: ConcatDataset size=2
3509
+ [12/30 23:55:59][INFO]
3510
+ [12/30 23:55:59][INFO] [UnityDataset] Found 2 sequences.
3511
+ [12/30 23:55:59][INFO] [Val Dataset][7/7]: name=unity_val, size=2, genmo.datasets.unity_dataset.UnityDataset
3512
+ [12/30 23:55:59][INFO]
3513
+ [12/30 23:56:06][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3514
+ [12/30 23:56:23][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_6/checkpoints'
3515
+ [12/30 23:56:35][INFO] Start Fitting...
3516
+ [12/30 23:56:37][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3517
+
3518
+ [12/30 23:56:37][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3519
+
3520
+ [12/30 23:56:37][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3521
+
3522
+ [12/30 23:56:37][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3523
+ [12/30 23:56:39][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3524
+ return F.conv1d(input, weight, bias, self.stride,
3525
+
3526
+ [12/30 23:56:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3527
+ return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
3528
+
3529
+ [12/30 23:56:42][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
3530
+ warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
3531
+
3532
+ [12/30 23:56:54][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9930 pred=+0.9643 delta(pred-gt)=-0.0287
3533
+ [12/30 23:56:54][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.03876931 -0.17480041 0.02509396] global_orient0_aa(pred)=[-0.1090048 -1.7763788 -0.15125035]
3534
+ [12/30 23:56:54][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(-9.99,+2.34,+1.24) pred=(-101.99,-9.31,-0.53) pred_vs_gt=(-91.73,-11.16,-3.80)
3535
+ [12/30 23:56:54][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=+94.23
3536
+ [12/30 23:57:45][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3537
+
3538
+ [12/30 23:57:45][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3539
+
3540
+ [12/30 23:57:45][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3541
+
3542
+ [12/30 23:57:45][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3543
+
3544
+ [12/30 23:57:45][INFO] βœ…[FIT][Epoch 0] finished! 01:09β†’04:38 | loss_epoch=24.5
3545
+ [12/30 23:58:35][INFO] [Exp Name]: finetune_
3546
+ [12/30 23:58:35][INFO] [GPU x Batch] = 1 x 1
3547
+ [12/30 23:58:35][INFO] [UnityDataset] Found 1 sequences.
3548
+ [12/30 23:58:35][INFO] [Train Dataset][9/9]: name=unity, size=1, genmo.datasets.unity_dataset.UnityDataset
3549
+ [12/30 23:58:35][INFO] [Train Dataset][All]: ConcatDataset size=1
3550
+ [12/30 23:58:35][INFO]
3551
+ [12/30 23:58:35][INFO] [UnityDataset] Found 1 sequences.
3552
+ [12/30 23:58:35][INFO] [Val Dataset][7/7]: name=unity_val, size=1, genmo.datasets.unity_dataset.UnityDataset
3553
+ [12/30 23:58:35][INFO]
3554
+ [12/30 23:58:44][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3555
+ [12/30 23:59:06][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_7/checkpoints'
3556
+ [12/30 23:59:18][INFO] Start Fitting...
3557
+ [12/30 23:59:20][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3558
+
3559
+ [12/30 23:59:20][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3560
+
3561
+ [12/30 23:59:20][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3562
+
3563
+ [12/30 23:59:20][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3564
+ [12/30 23:59:22][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3565
+ return F.conv1d(input, weight, bias, self.stride,
3566
+
3567
+ [12/30 23:59:24][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3568
+ return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
3569
+
3570
+ [12/30 23:59:24][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
3571
+ warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
3572
+
3573
+ [12/30 23:59:36][INFO] [VisUnityVal] e000_100_biboo_birthday_speech_explosion_1 root_y0: gt=+0.9944 pred=+0.9685 delta(pred-gt)=-0.0259
3574
+ [12/30 23:59:36][INFO] [VisUnityVal] e000_100_biboo_birthday_speech_explosion_1 global_orient0_aa(gt)=[0.02056097 0.18737577 0.01068786] global_orient0_aa(pred)=[ 0.0337113 -2.8594027 -0.01747983]
3575
+ [12/30 23:59:36][INFO] [VisUnityVal] e000_100_biboo_birthday_speech_explosion_1 global_orient0_yxz_deg gt=(+10.74,+1.11,+0.72) pred=(-163.84,-0.50,-1.42) pred_vs_gt=(-174.54,-1.98,-1.80)
3576
+ [12/30 23:59:36][INFO] [VisUnityVal] e000_100_biboo_birthday_speech_explosion_1 yaw0_deg(pred_vs_gt)=+174.27
3577
+ [12/31 02:50:01][INFO] [Exp Name]: finetune_
3578
+ [12/31 02:50:01][INFO] [GPU x Batch] = 1 x 1
3579
+ [12/31 02:50:01][INFO] [UnityDataset] Found 1 sequences.
3580
+ [12/31 02:50:01][INFO] [Train Dataset][9/9]: name=unity, size=1, genmo.datasets.unity_dataset.UnityDataset
3581
+ [12/31 02:50:01][INFO] [Train Dataset][All]: ConcatDataset size=1
3582
+ [12/31 02:50:01][INFO]
3583
+ [12/31 02:50:01][INFO] [UnityDataset] Found 1 sequences.
3584
+ [12/31 02:50:01][INFO] [Val Dataset][7/7]: name=unity_val, size=1, genmo.datasets.unity_dataset.UnityDataset
3585
+ [12/31 02:50:01][INFO]
3586
+ [12/31 02:50:07][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3587
+ [12/31 02:50:28][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_8/checkpoints'
3588
+ [12/31 02:50:41][INFO] Start Fitting...
3589
+ [12/31 02:50:42][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3590
+
3591
+ [12/31 02:50:42][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3592
+
3593
+ [12/31 02:50:42][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3594
+
3595
+ [12/31 02:50:42][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3596
+ [12/31 02:50:43][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3597
+ return F.conv1d(input, weight, bias, self.stride,
3598
+
3599
+ [12/31 02:50:45][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3600
+ return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
3601
+
3602
+ [12/31 02:50:45][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
3603
+ warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
3604
+
3605
+ [12/31 02:50:55][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9875 pred=+0.9726 delta(pred-gt)=-0.0149
3606
+ [12/31 02:50:55][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.01689476 -0.20703591 0.01797612] global_orient0_aa(pred)=[-0.0321125 -2.8486555 -0.07525362]
3607
+ [12/31 02:50:55][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(-11.85,+1.07,+0.92) pred=(-163.30,-3.15,+0.83) pred_vs_gt=(-151.41,-4.11,-0.96)
3608
+ [12/31 02:50:55][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=+151.99
3609
+ [12/31 02:51:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3610
+
3611
+ [12/31 02:51:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3612
+
3613
+ [12/31 02:51:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3614
+
3615
+ [12/31 02:51:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3616
+
3617
+ [12/31 02:51:41][INFO] βœ…[FIT][Epoch 0] finished! 01:00β†’04:01 | loss_epoch=12.6
3618
+ [12/31 03:10:22][INFO] [Exp Name]: finetune_
3619
+ [12/31 03:10:22][INFO] [GPU x Batch] = 1 x 1
3620
+ [12/31 03:10:22][INFO] [UnityDataset] Found 1 sequences.
3621
+ [12/31 03:10:22][INFO] [Train Dataset][9/9]: name=unity, size=1, genmo.datasets.unity_dataset.UnityDataset
3622
+ [12/31 03:10:22][INFO] [Train Dataset][All]: ConcatDataset size=1
3623
+ [12/31 03:10:22][INFO]
3624
+ [12/31 03:10:22][INFO] [UnityDataset] Found 1 sequences.
3625
+ [12/31 03:10:22][INFO] [Val Dataset][7/7]: name=unity_val, size=1, genmo.datasets.unity_dataset.UnityDataset
3626
+ [12/31 03:10:22][INFO]
3627
+ [12/31 03:10:28][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3628
+ [12/31 03:10:51][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_9/checkpoints'
3629
+ [12/31 03:11:01][INFO] Start Fitting...
3630
+ [12/31 03:11:03][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3631
+
3632
+ [12/31 03:11:03][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3633
+
3634
+ [12/31 03:11:03][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3635
+
3636
+ [12/31 03:11:03][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3637
+ [12/31 03:11:04][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3638
+ return F.conv1d(input, weight, bias, self.stride,
3639
+
3640
+ [12/31 03:11:05][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3641
+ return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
3642
+
3643
+ [12/31 03:11:05][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
3644
+ warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
3645
+
3646
+ [12/31 03:11:16][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9875 pred=+0.9726 delta(pred-gt)=-0.0149
3647
+ [12/31 03:11:16][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.02646996 2.9343371 -0.02487765] global_orient0_aa(pred)=[-0.03202499 -2.848779 -0.0755955 ]
3648
+ [12/31 03:11:16][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(+168.15,+1.07,+0.92) pred=(-163.31,-3.16,+0.82) pred_vs_gt=(+28.58,+4.12,+0.97)
3649
+ [12/31 03:11:16][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=-28.01
3650
+ [12/31 03:12:02][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3651
+
3652
+ [12/31 03:12:02][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3653
+
3654
+ [12/31 03:12:02][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3655
+
3656
+ [12/31 03:12:02][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3657
+
3658
+ [12/31 03:12:02][INFO] βœ…[FIT][Epoch 0] finished! 01:00β†’04:01 | loss_epoch=14.2
3659
+ [12/31 03:16:57][INFO] [Exp Name]: finetune_
3660
+ [12/31 03:16:57][INFO] [GPU x Batch] = 1 x 1
3661
+ [12/31 03:16:57][INFO] [UnityDataset] Found 1 sequences.
3662
+ [12/31 03:16:57][INFO] [Train Dataset][9/9]: name=unity, size=1, genmo.datasets.unity_dataset.UnityDataset
3663
+ [12/31 03:16:57][INFO] [Train Dataset][All]: ConcatDataset size=1
3664
+ [12/31 03:16:57][INFO]
3665
+ [12/31 03:16:57][INFO] [UnityDataset] Found 1 sequences.
3666
+ [12/31 03:16:57][INFO] [Val Dataset][7/7]: name=unity_val, size=1, genmo.datasets.unity_dataset.UnityDataset
3667
+ [12/31 03:16:57][INFO]
3668
+ [12/31 03:17:04][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3669
+ [12/31 03:17:24][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_10/checkpoints'
3670
+ [12/31 03:17:36][INFO] Start Fitting...
3671
+ [12/31 03:17:38][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3672
+
3673
+ [12/31 03:17:38][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3674
+
3675
+ [12/31 03:17:38][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3676
+
3677
+ [12/31 03:17:38][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3678
+ [12/31 03:17:40][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3679
+ return F.conv1d(input, weight, bias, self.stride,
3680
+
3681
+ [12/31 03:17:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3682
+ return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
3683
+
3684
+ [12/31 03:17:41][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
3685
+ warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
3686
+
3687
+ [12/31 03:17:52][INFO] [VisUnityVal] e000_100_biboo_birthday_speech_explosion_1 root_y0: gt=+0.9944 pred=+0.9686 delta(pred-gt)=-0.0258
3688
+ [12/31 03:17:52][INFO] [VisUnityVal] e000_100_biboo_birthday_speech_explosion_1 global_orient0_aa(gt)=[-0.01583315 -2.9540217 0.03045931] global_orient0_aa(pred)=[ 0.03382589 -2.8592563 -0.01758517]
3689
+ [12/31 03:17:52][INFO] [VisUnityVal] e000_100_biboo_birthday_speech_explosion_1 global_orient0_yxz_deg gt=(-169.26,+1.11,+0.72) pred=(-163.83,-0.50,-1.43) pred_vs_gt=(+5.47,+1.99,+1.81)
3690
+ [12/31 03:17:52][INFO] [VisUnityVal] e000_100_biboo_birthday_speech_explosion_1 yaw0_deg(pred_vs_gt)=-5.75
3691
+ [12/31 03:18:36][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3692
+
3693
+ [12/31 03:18:36][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3694
+
3695
+ [12/31 03:18:36][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3696
+
3697
+ [12/31 03:18:36][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3698
+
3699
+ [12/31 03:18:36][INFO] βœ…[FIT][Epoch 0] finished! 00:58β†’03:55 | loss_epoch=23
3700
+ [12/31 06:06:15][INFO] [Exp Name]: finetune_
3701
+ [12/31 06:06:15][INFO] [GPU x Batch] = 1 x 1
3702
+ [12/31 06:06:15][INFO] [UnityDataset] Found 3 sequences.
3703
+ [12/31 06:06:15][INFO] [Train Dataset][9/9]: name=unity, size=3, genmo.datasets.unity_dataset.UnityDataset
3704
+ [12/31 06:06:15][INFO] [Train Dataset][All]: ConcatDataset size=3
3705
+ [12/31 06:06:15][INFO]
3706
+ [12/31 06:06:15][INFO] [UnityDataset] Found 3 sequences.
3707
+ [12/31 06:06:15][INFO] [Val Dataset][7/7]: name=unity_val, size=3, genmo.datasets.unity_dataset.UnityDataset
3708
+ [12/31 06:06:15][INFO]
3709
+ [12/31 06:06:21][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3710
+ [12/31 06:06:49][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_11/checkpoints'
3711
+ [12/31 06:07:02][INFO] Start Fitting...
3712
+ [12/31 06:07:04][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3713
+
3714
+ [12/31 06:07:04][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3715
+
3716
+ [12/31 06:07:04][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3717
+
3718
+ [12/31 06:07:04][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3719
+ [12/31 06:07:07][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3720
+ return F.conv1d(input, weight, bias, self.stride,
3721
+
3722
+ [12/31 06:07:09][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3723
+ return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
3724
+
3725
+ [12/31 06:07:11][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
3726
+ warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
3727
+
3728
+ [12/31 06:07:22][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9820 pred=+0.9698 delta(pred-gt)=-0.0123
3729
+ [12/31 06:07:22][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.03590106 -0.17807975 0.02012725] global_orient0_aa(pred)=[-0.08420898 -2.6493108 -0.07150012]
3730
+ [12/31 06:07:22][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(-10.19,+2.15,+0.96) pred=(-151.99,-3.76,+2.70) pred_vs_gt=(-141.82,-6.13,+0.67)
3731
+ [12/31 06:07:22][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=+144.65
3732
+ [12/31 06:08:17][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3733
+
3734
+ [12/31 06:08:17][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3735
+
3736
+ [12/31 06:08:17][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3737
+
3738
+ [12/31 06:08:17][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3739
+
3740
+ [12/31 06:08:17][INFO] βœ…[FIT][Epoch 0] finished! 01:14β†’04:57 | loss_epoch=41.9
3741
+ [12/31 06:13:34][INFO] [Exp Name]: finetune_
3742
+ [12/31 06:13:34][INFO] [GPU x Batch] = 1 x 1
3743
+ [12/31 06:13:34][INFO] [UnityDataset] Found 3 sequences.
3744
+ [12/31 06:13:34][INFO] [Train Dataset][9/9]: name=unity, size=3, genmo.datasets.unity_dataset.UnityDataset
3745
+ [12/31 06:13:34][INFO] [Train Dataset][All]: ConcatDataset size=3
3746
+ [12/31 06:13:34][INFO]
3747
+ [12/31 06:13:34][INFO] [UnityDataset] Found 3 sequences.
3748
+ [12/31 06:13:34][INFO] [Val Dataset][7/7]: name=unity_val, size=3, genmo.datasets.unity_dataset.UnityDataset
3749
+ [12/31 06:13:34][INFO]
3750
+ [12/31 06:13:43][INFO] [Exp Name]: finetune_
3751
+ [12/31 06:13:43][INFO] [GPU x Batch] = 1 x 1
3752
+ [12/31 06:13:43][INFO] [UnityDataset] Found 1 sequences.
3753
+ [12/31 06:13:43][INFO] [Train Dataset][9/9]: name=unity, size=1, genmo.datasets.unity_dataset.UnityDataset
3754
+ [12/31 06:13:43][INFO] [Train Dataset][All]: ConcatDataset size=1
3755
+ [12/31 06:13:43][INFO]
3756
+ [12/31 06:13:43][INFO] [UnityDataset] Found 1 sequences.
3757
+ [12/31 06:13:43][INFO] [Val Dataset][7/7]: name=unity_val, size=1, genmo.datasets.unity_dataset.UnityDataset
3758
+ [12/31 06:13:43][INFO]
3759
+ [12/31 06:13:48][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3760
+ [12/31 06:14:11][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_12/checkpoints'
3761
+ [12/31 06:14:22][INFO] Start Fitting...
3762
+ [12/31 06:14:26][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3763
+
3764
+ [12/31 06:14:26][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3765
+
3766
+ [12/31 06:14:26][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3767
+
3768
+ [12/31 06:14:26][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3769
+ [12/31 06:14:28][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3770
+ return F.conv1d(input, weight, bias, self.stride,
3771
+
3772
+ [12/31 06:14:30][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3773
+ return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
3774
+
3775
+ [12/31 06:14:30][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
3776
+ warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
3777
+
3778
+ [12/31 06:14:41][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9875 pred=+0.9726 delta(pred-gt)=-0.0149
3779
+ [12/31 06:14:41][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.01689476 -0.20703594 0.01797612] global_orient0_aa(pred)=[-0.03202499 -2.848779 -0.0755955 ]
3780
+ [12/31 06:14:41][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(-11.85,+1.07,+0.92) pred=(-163.31,-3.16,+0.82) pred_vs_gt=(-151.42,-4.12,-0.97)
3781
+ [12/31 06:14:41][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=+151.99
3782
+ [12/31 06:15:23][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3783
+
3784
+ [12/31 06:15:23][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3785
+
3786
+ [12/31 06:15:23][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3787
+
3788
+ [12/31 06:15:23][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3789
+
3790
+ [12/31 06:15:23][INFO] βœ…[FIT][Epoch 0] finished! 01:00β†’04:00 | loss_epoch=14.3
3791
+ [12/31 06:19:20][INFO] [Exp Name]: finetune_
3792
+ [12/31 06:19:20][INFO] [GPU x Batch] = 1 x 1
3793
+ [12/31 06:19:20][INFO] [UnityDataset] Found 1 sequences.
3794
+ [12/31 06:19:20][INFO] [Train Dataset][9/9]: name=unity, size=1, genmo.datasets.unity_dataset.UnityDataset
3795
+ [12/31 06:19:20][INFO] [Train Dataset][All]: ConcatDataset size=1
3796
+ [12/31 06:19:20][INFO]
3797
+ [12/31 06:19:20][INFO] [UnityDataset] Found 1 sequences.
3798
+ [12/31 06:19:20][INFO] [Val Dataset][7/7]: name=unity_val, size=1, genmo.datasets.unity_dataset.UnityDataset
3799
+ [12/31 06:19:20][INFO]
3800
+ [12/31 06:19:26][INFO] [PL-Trainer] Loading ckpt: ./s050000.ckpt
3801
+ [12/31 06:19:48][INFO] [Simple Ckpt Saver]: Save to `outputs/unity/finetune_/version_13/checkpoints'
3802
+ [12/31 06:19:59][INFO] Start Fitting...
3803
+ [12/31 06:20:01][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
3804
+
3805
+ [12/31 06:20:01][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3806
+
3807
+ [12/31 06:20:01][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
3808
+
3809
+ [12/31 06:20:01][INFO] πŸš€[FIT][Epoch 0] Data: unity Experiment: finetune_
3810
+ [12/31 06:20:04][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3811
+ return F.conv1d(input, weight, bias, self.stride,
3812
+
3813
+ [12/31 06:20:06][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/autograd/graph.py:744: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
3814
+ return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
3815
+
3816
+ [12/31 06:20:06][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
3817
+ warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
3818
+
3819
+ [12/31 06:20:16][INFO] [VisUnityVal] e000_0_biboo_birthday_speech root_y0: gt=+0.9875 pred=+0.9726 delta(pred-gt)=-0.0149
3820
+ [12/31 06:20:16][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_aa(gt)=[ 0.02646996 2.9343371 -0.02487765] global_orient0_aa(pred)=[-0.03202499 -2.848779 -0.0755955 ]
3821
+ [12/31 06:20:16][INFO] [VisUnityVal] e000_0_biboo_birthday_speech global_orient0_yxz_deg gt=(+168.15,+1.07,+0.92) pred=(-163.31,-3.16,+0.82) pred_vs_gt=(+28.58,+4.12,+0.97)
3822
+ [12/31 06:20:16][INFO] [VisUnityVal] e000_0_biboo_birthday_speech yaw0_deg(pred_vs_gt)=-28.01
3823
+ [12/31 06:20:59][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pa_mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3824
+
3825
+ [12/31 06:20:59][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/mpjpe', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3826
+
3827
+ [12/31 06:20:59][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/pve', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3828
+
3829
+ [12/31 06:20:59][WARNING] /root/miniconda3/envs/gvhmr/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:433: It is recommended to use `self.log('val_metric_Unity/accel', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
3830
+
3831
+ [12/31 06:20:59][INFO] βœ…[FIT][Epoch 0] finished! 00:59β†’03:56 | loss_epoch=14.2
train.sh CHANGED
@@ -1 +1,9 @@
1
- python scripts/train.py --config-name finetune_unity ckpt_path=./s050000.ckpt
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ # Make sure local repo modules (incl. `third_party/*`) are importable.
5
+ export PYTHONPATH="$(pwd)${PYTHONPATH:+:$PYTHONPATH}"
6
+ # GVHMR uses absolute imports like `import hmr4d...` internally.
7
+ export PYTHONPATH="$(pwd)/third_party/GVHMR${PYTHONPATH:+:$PYTHONPATH}"
8
+
9
+ python scripts/train.py --config-name finetune_unity ckpt_path=./s050000.ckpt