diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..bda7aa7ca3cd0656ed012ffc0d12a208e4ef2086 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+WiLoR/assets/teaser.png filter=lfs diff=lfs merge=lfs -text
+WiLoR/demo_img/test2.png filter=lfs diff=lfs merge=lfs -text
+WiLoR/demo_img/test4.jpg filter=lfs diff=lfs merge=lfs -text
+WiLoR/demo_img/test5.jpeg filter=lfs diff=lfs merge=lfs -text
+WiLoR/demo_img/test6.jpg filter=lfs diff=lfs merge=lfs -text
+WiLoR/demo_img/test8.jpg filter=lfs diff=lfs merge=lfs -text
diff --git a/WiLoR/README.md b/WiLoR/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..534cd597427b6d1e815bc1c25a97569fe706d1f5
--- /dev/null
+++ b/WiLoR/README.md
@@ -0,0 +1,93 @@
+
+
+[](https://paperswithcode.com/sota/3d-hand-pose-estimation-on-freihand?p=wilor-end-to-end-3d-hand-localization-and)
+[](https://paperswithcode.com/sota/3d-hand-pose-estimation-on-ho-3d?p=wilor-end-to-end-3d-hand-localization-and)
+
+
+
+This is the official implementation of **[WiLoR](https://rolpotamias.github.io/WiLoR/)**, an state-of-the-art hand localization and reconstruction model:
+
+
+
+## Installation
+### [Update] Quick Installation
+Thanks to [@warmshao](https://github.com/warmshao) WiLoR can now be installed using a single pip command:
+```
+pip install git+https://github.com/warmshao/WiLoR-mini
+```
+Please head to [WiLoR-mini](https://github.com/warmshao/WiLoR-mini) for additional details.
+
+**Note:** the above code is a simplified version of WiLoR and can be used for demo only.
+If you wish to use WiLoR for other tasks it is suggested to follow the original installation instructued bellow:
+### Original Installation
+```
+git clone --recursive https://github.com/rolpotamias/WiLoR.git
+cd WiLoR
+```
+
+The code has been tested with PyTorch 2.0.0 and CUDA 11.7. It is suggested to use an anaconda environment to install the the required dependencies:
+```bash
+conda create --name wilor python=3.10
+conda activate wilor
+
+pip install torch torchvision --index-url https://download.pytorch.org/whl/cu117
+# Install requirements
+pip install -r requirements.txt
+```
+Download the pretrained models using:
+```bash
+wget https://huggingface.co/spaces/rolpotamias/WiLoR/resolve/main/pretrained_models/detector.pt -P ./pretrained_models/
+wget https://huggingface.co/spaces/rolpotamias/WiLoR/resolve/main/pretrained_models/wilor_final.ckpt -P ./pretrained_models/
+```
+It is also required to download MANO model from [MANO website](https://mano.is.tue.mpg.de).
+Create an account by clicking Sign Up and download the models (mano_v*_*.zip). Unzip and place the right hand model `MANO_RIGHT.pkl` under the `mano_data/` folder.
+Note that MANO model falls under the [MANO license](https://mano.is.tue.mpg.de/license.html).
+## Demo
+```bash
+python demo.py --img_folder demo_img --out_folder demo_out --save_mesh
+```
+## Start a local gradio demo
+You can start a local demo for inference by running:
+```bash
+python gradio_demo.py
+```
+## WHIM Dataset
+To download WHIM dataset please follow the instructions [here](./whim/Dataset_instructions.md)
+
+## Acknowledgements
+Parts of the code are taken or adapted from the following repos:
+- [HaMeR](https://github.com/geopavlakos/hamer/)
+- [Ultralytics](https://github.com/ultralytics/ultralytics)
+
+## License
+WiLoR models fall under the [CC-BY-NC--ND License](./license.txt). This repository depends also on [Ultralytics library](https://github.com/ultralytics/ultralytics) and [MANO Model](https://mano.is.tue.mpg.de/license.html), which are fall under their own licenses. By using this repository, you must also comply with the terms of these external licenses.
+## Citing
+If you find WiLoR useful for your research, please consider citing our paper:
+
+```bibtex
+@misc{potamias2024wilor,
+ title={WiLoR: End-to-end 3D Hand Localization and Reconstruction in-the-wild},
+ author={Rolandos Alexandros Potamias and Jinglei Zhang and Jiankang Deng and Stefanos Zafeiriou},
+ year={2024},
+ eprint={2409.12259},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
diff --git a/WiLoR/assets/teaser.png b/WiLoR/assets/teaser.png
new file mode 100644
index 0000000000000000000000000000000000000000..b30727edad1b34578698f037bca05266c56a02d4
--- /dev/null
+++ b/WiLoR/assets/teaser.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5f07ada2f470af0619716c0ce4f60d9dfd3da1673d06c28c97d85abb84eadc0
+size 9209351
diff --git a/WiLoR/demo.py b/WiLoR/demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..4502e18eb7a2f9b1e990c1456f0cfa230fc8d62d
--- /dev/null
+++ b/WiLoR/demo.py
@@ -0,0 +1,142 @@
+from pathlib import Path
+import torch
+import argparse
+import os
+import cv2
+import numpy as np
+import json
+from typing import Dict, Optional
+
+from wilor.models import WiLoR, load_wilor
+from wilor.utils import recursive_to
+from wilor.datasets.vitdet_dataset import ViTDetDataset, DEFAULT_MEAN, DEFAULT_STD
+from wilor.utils.renderer import Renderer, cam_crop_to_full
+from ultralytics import YOLO
+LIGHT_PURPLE=(0.25098039, 0.274117647, 0.65882353)
+
+def main():
+ parser = argparse.ArgumentParser(description='WiLoR demo code')
+ parser.add_argument('--img_folder', type=str, default='images', help='Folder with input images')
+ parser.add_argument('--out_folder', type=str, default='out_demo', help='Output folder to save rendered results')
+ parser.add_argument('--save_mesh', dest='save_mesh', action='store_true', default=False, help='If set, save meshes to disk also')
+ parser.add_argument('--rescale_factor', type=float, default=2.0, help='Factor for padding the bbox')
+ parser.add_argument('--file_type', nargs='+', default=['*.jpg', '*.png', '*.jpeg'], help='List of file extensions to consider')
+
+ args = parser.parse_args()
+
+ # Download and load checkpoints
+ model, model_cfg = load_wilor(checkpoint_path = './pretrained_models/wilor_final.ckpt' , cfg_path= './pretrained_models/model_config.yaml')
+ detector = YOLO('./pretrained_models/detector.pt')
+ # Setup the renderer
+ renderer = Renderer(model_cfg, faces=model.mano.faces)
+ renderer_side = Renderer(model_cfg, faces=model.mano.faces)
+
+ device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+ model = model.to(device)
+ detector = detector.to(device)
+ model.eval()
+
+ # Make output directory if it does not exist
+ os.makedirs(args.out_folder, exist_ok=True)
+
+ # Get all demo images ends with .jpg or .png
+ img_paths = [img for end in args.file_type for img in Path(args.img_folder).glob(end)]
+ # Iterate over all images in folder
+ for img_path in img_paths:
+ img_cv2 = cv2.imread(str(img_path))
+ detections = detector(img_cv2, conf = 0.3, verbose=False)[0]
+ bboxes = []
+ is_right = []
+ for det in detections:
+ Bbox = det.boxes.data.cpu().detach().squeeze().numpy()
+ is_right.append(det.boxes.cls.cpu().detach().squeeze().item())
+ bboxes.append(Bbox[:4].tolist())
+
+ if len(bboxes) == 0:
+ continue
+ boxes = np.stack(bboxes)
+ right = np.stack(is_right)
+ dataset = ViTDetDataset(model_cfg, img_cv2, boxes, right, rescale_factor=args.rescale_factor)
+ dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=False, num_workers=0)
+
+ all_verts = []
+ all_cam_t = []
+ all_right = []
+ all_joints= []
+ all_kpts = []
+
+ for batch in dataloader:
+ batch = recursive_to(batch, device)
+
+ with torch.no_grad():
+ out = model(batch)
+
+ multiplier = (2*batch['right']-1)
+ pred_cam = out['pred_cam']
+ pred_cam[:,1] = multiplier*pred_cam[:,1]
+ box_center = batch["box_center"].float()
+ box_size = batch["box_size"].float()
+ img_size = batch["img_size"].float()
+ scaled_focal_length = model_cfg.EXTRA.FOCAL_LENGTH / model_cfg.MODEL.IMAGE_SIZE * img_size.max()
+ pred_cam_t_full = cam_crop_to_full(pred_cam, box_center, box_size, img_size, scaled_focal_length).detach().cpu().numpy()
+
+
+ # Render the result
+ batch_size = batch['img'].shape[0]
+ for n in range(batch_size):
+ # Get filename from path img_path
+ img_fn, _ = os.path.splitext(os.path.basename(img_path))
+
+ verts = out['pred_vertices'][n].detach().cpu().numpy()
+ joints = out['pred_keypoints_3d'][n].detach().cpu().numpy()
+
+ is_right = batch['right'][n].cpu().numpy()
+ verts[:,0] = (2*is_right-1)*verts[:,0]
+ joints[:,0] = (2*is_right-1)*joints[:,0]
+ cam_t = pred_cam_t_full[n]
+ kpts_2d = project_full_img(verts, cam_t, scaled_focal_length, img_size[n])
+
+ all_verts.append(verts)
+ all_cam_t.append(cam_t)
+ all_right.append(is_right)
+ all_joints.append(joints)
+ all_kpts.append(kpts_2d)
+
+
+ # Save all meshes to disk
+ if args.save_mesh:
+ camera_translation = cam_t.copy()
+ tmesh = renderer.vertices_to_trimesh(verts, camera_translation, LIGHT_PURPLE, is_right=is_right)
+ tmesh.export(os.path.join(args.out_folder, f'{img_fn}_{n}.obj'))
+
+ # Render front view
+ if len(all_verts) > 0:
+ misc_args = dict(
+ mesh_base_color=LIGHT_PURPLE,
+ scene_bg_color=(1, 1, 1),
+ focal_length=scaled_focal_length,
+ )
+ cam_view = renderer.render_rgba_multiple(all_verts, cam_t=all_cam_t, render_res=img_size[n], is_right=all_right, **misc_args)
+
+ # Overlay image
+ input_img = img_cv2.astype(np.float32)[:,:,::-1]/255.0
+ input_img = np.concatenate([input_img, np.ones_like(input_img[:,:,:1])], axis=2) # Add alpha channel
+ input_img_overlay = input_img[:,:,:3] * (1-cam_view[:,:,3:]) + cam_view[:,:,:3] * cam_view[:,:,3:]
+
+ cv2.imwrite(os.path.join(args.out_folder, f'{img_fn}.jpg'), 255*input_img_overlay[:, :, ::-1])
+
+def project_full_img(points, cam_trans, focal_length, img_res):
+ camera_center = [img_res[0] / 2., img_res[1] / 2.]
+ K = torch.eye(3)
+ K[0,0] = focal_length
+ K[1,1] = focal_length
+ K[0,2] = camera_center[0]
+ K[1,2] = camera_center[1]
+ points = points + cam_trans
+ points = points / points[..., -1:]
+
+ V_2d = (K @ points.T).T
+ return V_2d[..., :-1]
+
+if __name__ == '__main__':
+ main()
diff --git a/WiLoR/demo_img/test1.jpg b/WiLoR/demo_img/test1.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3686bcedfef98e9c671df705b324e4301430ab68
Binary files /dev/null and b/WiLoR/demo_img/test1.jpg differ
diff --git a/WiLoR/demo_img/test2.png b/WiLoR/demo_img/test2.png
new file mode 100644
index 0000000000000000000000000000000000000000..93f1ec0a4261e73987381685db6c4c895dfd7009
--- /dev/null
+++ b/WiLoR/demo_img/test2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:589f5d12593acbcbcb9ec07b288b04f6d7e70542e1312ceee3ea992ba0f41ff9
+size 1009481
diff --git a/WiLoR/demo_img/test3.jpg b/WiLoR/demo_img/test3.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7f291937611596d24c2fea1c3e84a57226c602a9
Binary files /dev/null and b/WiLoR/demo_img/test3.jpg differ
diff --git a/WiLoR/demo_img/test4.jpg b/WiLoR/demo_img/test4.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..92f67dc189a433f3191a6ea77f105c22cbe3741d
--- /dev/null
+++ b/WiLoR/demo_img/test4.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efb16543caa936aa671ad1cb28ca2c6129ba8cba58d08476ed9538fd12de9265
+size 315497
diff --git a/WiLoR/demo_img/test5.jpeg b/WiLoR/demo_img/test5.jpeg
new file mode 100644
index 0000000000000000000000000000000000000000..532de1b7f718b67fec1cc5601f5787b89d45f782
--- /dev/null
+++ b/WiLoR/demo_img/test5.jpeg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84d161aa4f1a335ec3971c5d050338e7c13b9e3c90231c0de7e677094a172eae
+size 206645
diff --git a/WiLoR/demo_img/test6.jpg b/WiLoR/demo_img/test6.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..db72443d1d468f650b148cdfc2af9c5d1461ebbe
--- /dev/null
+++ b/WiLoR/demo_img/test6.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:617a3a3d04a1e17e4285dab5bca2003080923df66953df93c85ddfdaa383e8f5
+size 107026
diff --git a/WiLoR/demo_img/test7.jpg b/WiLoR/demo_img/test7.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c48a1ee2a5635939ca0ad617d23a8523ed4fd519
Binary files /dev/null and b/WiLoR/demo_img/test7.jpg differ
diff --git a/WiLoR/demo_img/test8.jpg b/WiLoR/demo_img/test8.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8e9a511e90c7880561df526f8145addf5bb7cd3f
--- /dev/null
+++ b/WiLoR/demo_img/test8.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:886ef1a8981bef175707353b2adea60168657a926c1dd5a95789c4907d881907
+size 397960
diff --git a/WiLoR/download_videos.py b/WiLoR/download_videos.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c1700deae54548e9c5842c07b3ec9737c9b4d35
--- /dev/null
+++ b/WiLoR/download_videos.py
@@ -0,0 +1,58 @@
+import os
+import json
+import numpy as np
+import argparse
+from pytubefix import YouTube
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument("--root", type=str, help="Directory of WiLoR")
+parser.add_argument("--mode", type=str, choices=['train', 'test'], default= 'train', help="Train/Test set")
+
+args = parser.parse_args()
+
+with open(os.path.join(args.root, f'./whim/{args.mode}_video_ids.json')) as f:
+ video_dict = json.load(f)
+
+Video_IDs = video_dict.keys()
+failed_IDs = []
+os.makedirs(os.path.join(args.root, 'Videos'), exist_ok=True)
+
+for Video_ID in Video_IDs:
+ res = video_dict[Video_ID]['res'][0]
+ try:
+ YouTube('https://youtu.be/'+Video_ID).streams.filter(only_video=True,
+ file_extension='mp4',
+ res =f'{res}p'
+ ).order_by('resolution').desc().first().download(
+ output_path=os.path.join(args.root, 'Videos') ,
+ filename = Video_ID +'.mp4')
+ except:
+ print(f'Failed {Video_ID}')
+ failed_IDs.append(Video_ID)
+ continue
+
+
+ cap = cv2.VideoCapture(os.path.join(args.root, 'Videos', Video_ID + '.mp4'))
+ if (cap.isOpened()== False):
+ print(f"Error opening video stream {os.path.join(args.root, 'Videos', Video_ID + '.mp4')}")
+
+ VIDEO_LEN = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+ length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+ fps = cap.get(cv2.CAP_PROP_FPS)
+
+ fps_org = video_dict[Video_ID]['fps']
+ fps_rate = round(fps / fps_org)
+
+ all_frames = os.listdir(os.path.join(args.root, 'WHIM', args.mode, 'anno', Video_ID))
+
+ for frame in all_frames:
+ frame_gt = int(frame[:-4])
+ frame_idx = (frame_gt * fps_rate)
+
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
+ ret, img_cv2 = cap.read()
+
+ cv2.imwrite(os.path.join(args.root, 'WHIM', args.mode, 'anno', Video_ID, frame +'.jpg' ), img_cv2.astype(np.float32))
+
+np.save(os.path.join(args.root, 'failed_videos.npy'), failed_IDs)
diff --git a/WiLoR/gradio_demo.py b/WiLoR/gradio_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3784944e81fa6f0b2738d6849109e448255193a
--- /dev/null
+++ b/WiLoR/gradio_demo.py
@@ -0,0 +1,192 @@
+import os
+import sys
+os.environ["PYOPENGL_PLATFORM"] = "egl"
+os.environ["MESA_GL_VERSION_OVERRIDE"] = "4.1"
+# os.system('pip install /home/user/app/pyrender')
+# sys.path.append('/home/user/app/pyrender')
+
+import gradio as gr
+#import spaces
+import cv2
+import numpy as np
+import torch
+from ultralytics import YOLO
+from pathlib import Path
+import argparse
+import json
+from typing import Dict, Optional
+
+from wilor.models import WiLoR, load_wilor
+from wilor.utils import recursive_to
+from wilor.datasets.vitdet_dataset import ViTDetDataset, DEFAULT_MEAN, DEFAULT_STD
+from wilor.utils.renderer import Renderer, cam_crop_to_full
+device = torch.device('cpu') if torch.cuda.is_available() else torch.device('cuda')
+
+LIGHT_PURPLE=(0.25098039, 0.274117647, 0.65882353)
+
+model, model_cfg = load_wilor(checkpoint_path = './pretrained_models/wilor_final.ckpt' , cfg_path= './pretrained_models/model_config.yaml')
+# Setup the renderer
+renderer = Renderer(model_cfg, faces=model.mano.faces)
+model = model.to(device)
+model.eval()
+
+detector = YOLO(f'./pretrained_models/detector.pt').to(device)
+
+def render_reconstruction(image, conf, IoU_threshold=0.3):
+ input_img, num_dets, reconstructions = run_wilow_model(image, conf, IoU_threshold=0.5)
+ if num_dets> 0:
+ # Render front view
+
+ misc_args = dict(
+ mesh_base_color=LIGHT_PURPLE,
+ scene_bg_color=(1, 1, 1),
+ focal_length=reconstructions['focal'],
+ )
+
+ cam_view = renderer.render_rgba_multiple(reconstructions['verts'],
+ cam_t=reconstructions['cam_t'],
+ render_res=reconstructions['img_size'],
+ is_right=reconstructions['right'], **misc_args)
+
+ # Overlay image
+
+ input_img = np.concatenate([input_img, np.ones_like(input_img[:,:,:1])], axis=2) # Add alpha channel
+ input_img_overlay = input_img[:,:,:3] * (1-cam_view[:,:,3:]) + cam_view[:,:,:3] * cam_view[:,:,3:]
+
+ return input_img_overlay, f'{num_dets} hands detected'
+ else:
+ return input_img, f'{num_dets} hands detected'
+
+#@spaces.GPU()
+def run_wilow_model(image, conf, IoU_threshold=0.5):
+ img_cv2 = image[...,::-1]
+ img_vis = image.copy()
+
+ detections = detector(img_cv2, conf=conf, verbose=False, iou=IoU_threshold)[0]
+
+ bboxes = []
+ is_right = []
+ for det in detections:
+ Bbox = det.boxes.data.cpu().detach().squeeze().numpy()
+ Conf = det.boxes.conf.data.cpu().detach()[0].numpy().reshape(-1).astype(np.float16)
+ Side = det.boxes.cls.data.cpu().detach()
+ #Bbox[:2] -= np.int32(0.1 * Bbox[:2])
+ #Bbox[2:] += np.int32(0.1 * Bbox[ 2:])
+ is_right.append(det.boxes.cls.cpu().detach().squeeze().item())
+ bboxes.append(Bbox[:4].tolist())
+
+ color = (255*0.208, 255*0.647 ,255*0.603 ) if Side==0. else (255*1, 255*0.78039, 255*0.2353)
+ label = f'L - {Conf[0]:.3f}' if Side==0 else f'R - {Conf[0]:.3f}'
+
+ cv2.rectangle(img_vis, (int(Bbox[0]), int(Bbox[1])), (int(Bbox[2]), int(Bbox[3])), color , 3)
+ (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
+ cv2.rectangle(img_vis, (int(Bbox[0]), int(Bbox[1]) - 20), (int(Bbox[0]) + w, int(Bbox[1])), color, -1)
+ cv2.putText(img_vis, label, (int(Bbox[0]), int(Bbox[1]) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0), 2)
+
+ if len(bboxes) != 0:
+ boxes = np.stack(bboxes)
+ right = np.stack(is_right)
+ dataset = ViTDetDataset(model_cfg, img_cv2, boxes, right, rescale_factor=2.0 )
+ dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=False, num_workers=0)
+
+ all_verts = []
+ all_cam_t = []
+ all_right = []
+ all_joints= []
+
+ for batch in dataloader:
+ batch = recursive_to(batch, device)
+
+ with torch.no_grad():
+ out = model(batch)
+
+ multiplier = (2*batch['right']-1)
+ pred_cam = out['pred_cam']
+ pred_cam[:,1] = multiplier*pred_cam[:,1]
+ box_center = batch["box_center"].float()
+ box_size = batch["box_size"].float()
+ img_size = batch["img_size"].float()
+ scaled_focal_length = model_cfg.EXTRA.FOCAL_LENGTH / model_cfg.MODEL.IMAGE_SIZE * img_size.max()
+ pred_cam_t_full = cam_crop_to_full(pred_cam, box_center, box_size, img_size, scaled_focal_length).detach().cpu().numpy()
+
+
+ batch_size = batch['img'].shape[0]
+ for n in range(batch_size):
+
+ verts = out['pred_vertices'][n].detach().cpu().numpy()
+ joints = out['pred_keypoints_3d'][n].detach().cpu().numpy()
+
+ is_right = batch['right'][n].cpu().numpy()
+ verts[:,0] = (2*is_right-1)*verts[:,0]
+ joints[:,0] = (2*is_right-1)*joints[:,0]
+
+ cam_t = pred_cam_t_full[n]
+
+ all_verts.append(verts)
+ all_cam_t.append(cam_t)
+ all_right.append(is_right)
+ all_joints.append(joints)
+
+ reconstructions = {'verts': all_verts, 'cam_t': all_cam_t, 'right': all_right, 'img_size': img_size[n], 'focal': scaled_focal_length}
+ return img_vis.astype(np.float32)/255.0, len(detections), reconstructions
+ else:
+ return img_vis.astype(np.float32)/255.0, len(detections), None
+
+
+
+header = ('''
+
+

+

+

+

+''')
+
+
+with gr.Blocks(title="WiLoR: End-to-end 3D hand localization and reconstruction in-the-wild", css=".gradio-container") as demo:
+
+ gr.Markdown(header)
+
+ with gr.Row():
+ with gr.Column():
+ input_image = gr.Image(label="Input image", type="numpy")
+ threshold = gr.Slider(value=0.3, minimum=0.05, maximum=0.95, step=0.05, label='Detection Confidence Threshold')
+ #nms = gr.Slider(value=0.5, minimum=0.05, maximum=0.95, step=0.05, label='IoU NMS Threshold')
+ submit = gr.Button("Submit", variant="primary")
+
+
+ with gr.Column():
+ reconstruction = gr.Image(label="Reconstructions", type="numpy")
+ hands_detected = gr.Textbox(label="Hands Detected")
+
+ submit.click(fn=render_reconstruction, inputs=[input_image, threshold], outputs=[reconstruction, hands_detected])
+
+ with gr.Row():
+ example_images = gr.Examples([
+
+ ['./demo_img/test1.jpg'],
+ ['./demo_img/test2.png'],
+ ['./demo_img/test3.jpg'],
+ ['./demo_img/test4.jpg'],
+ ['./demo_img/test5.jpeg'],
+ ['./demo_img/test6.jpg'],
+ ['./demo_img/test7.jpg'],
+ ['./demo_img/test8.jpg'],
+ ],
+ inputs=input_image)
+
+demo.launch()
diff --git a/WiLoR/license.txt b/WiLoR/license.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e3ff6454d647f81609e934bb8faa4eb5a5396da
--- /dev/null
+++ b/WiLoR/license.txt
@@ -0,0 +1,402 @@
+Attribution-NonCommercial-NoDerivatives 4.0 International
+
+=======================================================================
+
+Creative Commons Corporation ("Creative Commons") is not a law firm and
+does not provide legal services or legal advice. Distribution of
+Creative Commons public licenses does not create a lawyer-client or
+other relationship. Creative Commons makes its licenses and related
+information available on an "as-is" basis. Creative Commons gives no
+warranties regarding its licenses, any material licensed under their
+terms and conditions, or any related information. Creative Commons
+disclaims all liability for damages resulting from their use to the
+fullest extent possible.
+
+Using Creative Commons Public Licenses
+
+Creative Commons public licenses provide a standard set of terms and
+conditions that creators and other rights holders may use to share
+original works of authorship and other material subject to copyright
+and certain other rights specified in the public license below. The
+following considerations are for informational purposes only, are not
+exhaustive, and do not form part of our licenses.
+
+ Considerations for licensors: Our public licenses are
+ intended for use by those authorized to give the public
+ permission to use material in ways otherwise restricted by
+ copyright and certain other rights. Our licenses are
+ irrevocable. Licensors should read and understand the terms
+ and conditions of the license they choose before applying it.
+ Licensors should also secure all rights necessary before
+ applying our licenses so that the public can reuse the
+ material as expected. Licensors should clearly mark any
+ material not subject to the license. This includes other CC-
+ licensed material, or material used under an exception or
+ limitation to copyright. More considerations for licensors:
+ wiki.creativecommons.org/Considerations_for_licensors
+
+ Considerations for the public: By using one of our public
+ licenses, a licensor grants the public permission to use the
+ licensed material under specified terms and conditions. If
+ the licensor's permission is not necessary for any reason--for
+ example, because of any applicable exception or limitation to
+ copyright--then that use is not regulated by the license. Our
+ licenses grant only permissions under copyright and certain
+ other rights that a licensor has authority to grant. Use of
+ the licensed material may still be restricted for other
+ reasons, including because others have copyright or other
+ rights in the material. A licensor may make special requests,
+ such as asking that all changes be marked or described.
+ Although not required by our licenses, you are encouraged to
+ respect those requests where reasonable. More considerations
+ for the public:
+ wiki.creativecommons.org/Considerations_for_licensees
+
+=======================================================================
+
+Creative Commons Attribution-NonCommercial-NoDerivatives 4.0
+International Public License
+
+By exercising the Licensed Rights (defined below), You accept and agree
+to be bound by the terms and conditions of this Creative Commons
+Attribution-NonCommercial-NoDerivatives 4.0 International Public
+License ("Public License"). To the extent this Public License may be
+interpreted as a contract, You are granted the Licensed Rights in
+consideration of Your acceptance of these terms and conditions, and the
+Licensor grants You such rights in consideration of benefits the
+Licensor receives from making the Licensed Material available under
+these terms and conditions.
+
+
+Section 1 -- Definitions.
+
+ a. Adapted Material means material subject to Copyright and Similar
+ Rights that is derived from or based upon the Licensed Material
+ and in which the Licensed Material is translated, altered,
+ arranged, transformed, or otherwise modified in a manner requiring
+ permission under the Copyright and Similar Rights held by the
+ Licensor. For purposes of this Public License, where the Licensed
+ Material is a musical work, performance, or sound recording,
+ Adapted Material is always produced where the Licensed Material is
+ synched in timed relation with a moving image.
+
+ b. Copyright and Similar Rights means copyright and/or similar rights
+ closely related to copyright including, without limitation,
+ performance, broadcast, sound recording, and Sui Generis Database
+ Rights, without regard to how the rights are labeled or
+ categorized. For purposes of this Public License, the rights
+ specified in Section 2(b)(1)-(2) are not Copyright and Similar
+ Rights.
+
+ c. Effective Technological Measures means those measures that, in the
+ absence of proper authority, may not be circumvented under laws
+ fulfilling obligations under Article 11 of the WIPO Copyright
+ Treaty adopted on December 20, 1996, and/or similar international
+ agreements.
+
+ d. Exceptions and Limitations means fair use, fair dealing, and/or
+ any other exception or limitation to Copyright and Similar Rights
+ that applies to Your use of the Licensed Material.
+
+ e. Licensed Material means the artistic or literary work, database,
+ or other material to which the Licensor applied this Public
+ License.
+
+ f. Licensed Rights means the rights granted to You subject to the
+ terms and conditions of this Public License, which are limited to
+ all Copyright and Similar Rights that apply to Your use of the
+ Licensed Material and that the Licensor has authority to license.
+
+ g. Licensor means the individual(s) or entity(ies) granting rights
+ under this Public License.
+
+ h. NonCommercial means not primarily intended for or directed towards
+ commercial advantage or monetary compensation. For purposes of
+ this Public License, the exchange of the Licensed Material for
+ other material subject to Copyright and Similar Rights by digital
+ file-sharing or similar means is NonCommercial provided there is
+ no payment of monetary compensation in connection with the
+ exchange.
+
+ i. Share means to provide material to the public by any means or
+ process that requires permission under the Licensed Rights, such
+ as reproduction, public display, public performance, distribution,
+ dissemination, communication, or importation, and to make material
+ available to the public including in ways that members of the
+ public may access the material from a place and at a time
+ individually chosen by them.
+
+ j. Sui Generis Database Rights means rights other than copyright
+ resulting from Directive 96/9/EC of the European Parliament and of
+ the Council of 11 March 1996 on the legal protection of databases,
+ as amended and/or succeeded, as well as other essentially
+ equivalent rights anywhere in the world.
+
+ k. You means the individual or entity exercising the Licensed Rights
+ under this Public License. Your has a corresponding meaning.
+
+
+Section 2 -- Scope.
+
+ a. License grant.
+
+ 1. Subject to the terms and conditions of this Public License,
+ the Licensor hereby grants You a worldwide, royalty-free,
+ non-sublicensable, non-exclusive, irrevocable license to
+ exercise the Licensed Rights in the Licensed Material to:
+
+ a. reproduce and Share the Licensed Material, in whole or
+ in part, for NonCommercial purposes only; and
+
+ b. produce and reproduce, but not Share, Adapted Material
+ for NonCommercial purposes only.
+
+ 2. Exceptions and Limitations. For the avoidance of doubt, where
+ Exceptions and Limitations apply to Your use, this Public
+ License does not apply, and You do not need to comply with
+ its terms and conditions.
+
+ 3. Term. The term of this Public License is specified in Section
+ 6(a).
+
+ 4. Media and formats; technical modifications allowed. The
+ Licensor authorizes You to exercise the Licensed Rights in
+ all media and formats whether now known or hereafter created,
+ and to make technical modifications necessary to do so. The
+ Licensor waives and/or agrees not to assert any right or
+ authority to forbid You from making technical modifications
+ necessary to exercise the Licensed Rights, including
+ technical modifications necessary to circumvent Effective
+ Technological Measures. For purposes of this Public License,
+ simply making modifications authorized by this Section 2(a)
+ (4) never produces Adapted Material.
+
+ 5. Downstream recipients.
+
+ a. Offer from the Licensor -- Licensed Material. Every
+ recipient of the Licensed Material automatically
+ receives an offer from the Licensor to exercise the
+ Licensed Rights under the terms and conditions of this
+ Public License.
+
+ b. No downstream restrictions. You may not offer or impose
+ any additional or different terms or conditions on, or
+ apply any Effective Technological Measures to, the
+ Licensed Material if doing so restricts exercise of the
+ Licensed Rights by any recipient of the Licensed
+ Material.
+
+ 6. No endorsement. Nothing in this Public License constitutes or
+ may be construed as permission to assert or imply that You
+ are, or that Your use of the Licensed Material is, connected
+ with, or sponsored, endorsed, or granted official status by,
+ the Licensor or others designated to receive attribution as
+ provided in Section 3(a)(1)(A)(i).
+
+ b. Other rights.
+
+ 1. Moral rights, such as the right of integrity, are not
+ licensed under this Public License, nor are publicity,
+ privacy, and/or other similar personality rights; however, to
+ the extent possible, the Licensor waives and/or agrees not to
+ assert any such rights held by the Licensor to the limited
+ extent necessary to allow You to exercise the Licensed
+ Rights, but not otherwise.
+
+ 2. Patent and trademark rights are not licensed under this
+ Public License.
+
+ 3. To the extent possible, the Licensor waives any right to
+ collect royalties from You for the exercise of the Licensed
+ Rights, whether directly or through a collecting society
+ under any voluntary or waivable statutory or compulsory
+ licensing scheme. In all other cases the Licensor expressly
+ reserves any right to collect such royalties, including when
+ the Licensed Material is used other than for NonCommercial
+ purposes.
+
+
+Section 3 -- License Conditions.
+
+Your exercise of the Licensed Rights is expressly made subject to the
+following conditions.
+
+ a. Attribution.
+
+ 1. If You Share the Licensed Material, You must:
+
+ a. retain the following if it is supplied by the Licensor
+ with the Licensed Material:
+
+ i. identification of the creator(s) of the Licensed
+ Material and any others designated to receive
+ attribution, in any reasonable manner requested by
+ the Licensor (including by pseudonym if
+ designated);
+
+ ii. a copyright notice;
+
+ iii. a notice that refers to this Public License;
+
+ iv. a notice that refers to the disclaimer of
+ warranties;
+
+ v. a URI or hyperlink to the Licensed Material to the
+ extent reasonably practicable;
+
+ b. indicate if You modified the Licensed Material and
+ retain an indication of any previous modifications; and
+
+ c. indicate the Licensed Material is licensed under this
+ Public License, and include the text of, or the URI or
+ hyperlink to, this Public License.
+
+ For the avoidance of doubt, You do not have permission under
+ this Public License to Share Adapted Material.
+
+ 2. You may satisfy the conditions in Section 3(a)(1) in any
+ reasonable manner based on the medium, means, and context in
+ which You Share the Licensed Material. For example, it may be
+ reasonable to satisfy the conditions by providing a URI or
+ hyperlink to a resource that includes the required
+ information.
+
+ 3. If requested by the Licensor, You must remove any of the
+ information required by Section 3(a)(1)(A) to the extent
+ reasonably practicable.
+
+
+Section 4 -- Sui Generis Database Rights.
+
+Where the Licensed Rights include Sui Generis Database Rights that
+apply to Your use of the Licensed Material:
+
+ a. for the avoidance of doubt, Section 2(a)(1) grants You the right
+ to extract, reuse, reproduce, and Share all or a substantial
+ portion of the contents of the database for NonCommercial purposes
+ only and provided You do not Share Adapted Material;
+
+ b. if You include all or a substantial portion of the database
+ contents in a database in which You have Sui Generis Database
+ Rights, then the database in which You have Sui Generis Database
+ Rights (but not its individual contents) is Adapted Material; and
+
+ c. You must comply with the conditions in Section 3(a) if You Share
+ all or a substantial portion of the contents of the database.
+
+For the avoidance of doubt, this Section 4 supplements and does not
+replace Your obligations under this Public License where the Licensed
+Rights include other Copyright and Similar Rights.
+
+
+Section 5 -- Disclaimer of Warranties and Limitation of Liability.
+
+ a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
+ EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
+ AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
+ ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
+ IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
+ WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
+ PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
+ ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
+ KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
+ ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
+
+ b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
+ TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
+ NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
+ INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
+ COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
+ USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
+ ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
+ DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
+ IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
+
+ c. The disclaimer of warranties and limitation of liability provided
+ above shall be interpreted in a manner that, to the extent
+ possible, most closely approximates an absolute disclaimer and
+ waiver of all liability.
+
+
+Section 6 -- Term and Termination.
+
+ a. This Public License applies for the term of the Copyright and
+ Similar Rights licensed here. However, if You fail to comply with
+ this Public License, then Your rights under this Public License
+ terminate automatically.
+
+ b. Where Your right to use the Licensed Material has terminated under
+ Section 6(a), it reinstates:
+
+ 1. automatically as of the date the violation is cured, provided
+ it is cured within 30 days of Your discovery of the
+ violation; or
+
+ 2. upon express reinstatement by the Licensor.
+
+ For the avoidance of doubt, this Section 6(b) does not affect any
+ right the Licensor may have to seek remedies for Your violations
+ of this Public License.
+
+ c. For the avoidance of doubt, the Licensor may also offer the
+ Licensed Material under separate terms or conditions or stop
+ distributing the Licensed Material at any time; however, doing so
+ will not terminate this Public License.
+
+ d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
+ License.
+
+
+Section 7 -- Other Terms and Conditions.
+
+ a. The Licensor shall not be bound by any additional or different
+ terms or conditions communicated by You unless expressly agreed.
+
+ b. Any arrangements, understandings, or agreements regarding the
+ Licensed Material not stated herein are separate from and
+ independent of the terms and conditions of this Public License.
+
+
+Section 8 -- Interpretation.
+
+ a. For the avoidance of doubt, this Public License does not, and
+ shall not be interpreted to, reduce, limit, restrict, or impose
+ conditions on any use of the Licensed Material that could lawfully
+ be made without permission under this Public License.
+
+ b. To the extent possible, if any provision of this Public License is
+ deemed unenforceable, it shall be automatically reformed to the
+ minimum extent necessary to make it enforceable. If the provision
+ cannot be reformed, it shall be severed from this Public License
+ without affecting the enforceability of the remaining terms and
+ conditions.
+
+ c. No term or condition of this Public License will be waived and no
+ failure to comply consented to unless expressly agreed to by the
+ Licensor.
+
+ d. Nothing in this Public License constitutes or may be interpreted
+ as a limitation upon, or waiver of, any privileges and immunities
+ that apply to the Licensor or You, including from the legal
+ processes of any jurisdiction or authority.
+
+=======================================================================
+
+Creative Commons is not a party to its public
+licenses. Notwithstanding, Creative Commons may elect to apply one of
+its public licenses to material it publishes and in those instances
+will be considered the “Licensor.†The text of the Creative Commons
+public licenses is dedicated to the public domain under the CC0 Public
+Domain Dedication. Except for the limited purpose of indicating that
+material is shared under a Creative Commons public license or as
+otherwise permitted by the Creative Commons policies published at
+creativecommons.org/policies, Creative Commons does not authorize the
+use of the trademark "Creative Commons" or any other trademark or logo
+of Creative Commons without its prior written consent including,
+without limitation, in connection with any unauthorized modifications
+to any of its public licenses or any other arrangements,
+understandings, or agreements concerning use of licensed material. For
+the avoidance of doubt, this paragraph does not form part of the
+public licenses.
+
+Creative Commons may be contacted at creativecommons.org.
\ No newline at end of file
diff --git a/WiLoR/mano_data/mano_mean_params.npz b/WiLoR/mano_data/mano_mean_params.npz
new file mode 100644
index 0000000000000000000000000000000000000000..dc294b01fb78a9cd6636c87a69b59cf82d28d15b
--- /dev/null
+++ b/WiLoR/mano_data/mano_mean_params.npz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efc0ec58e4a5cef78f3abfb4e8f91623b8950be9eff8b8e0dbb0d036ebc63988
+size 1178
diff --git a/WiLoR/pretrained_models/dataset_config.yaml b/WiLoR/pretrained_models/dataset_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7432c9c24e1876075fed3f8d1fe37e8e98201d80
--- /dev/null
+++ b/WiLoR/pretrained_models/dataset_config.yaml
@@ -0,0 +1,58 @@
+ARCTIC-TRAIN:
+ TYPE: ImageDataset
+ URLS: wilor_training_data/dataset_tars/arctic-train/{000000..000176}.tar
+ epoch_size: 177000
+BEDLAM-TRAIN:
+ TYPE: ImageDataset
+ URLS: wilor_training_data/dataset_tars/bedlam-train/{000000..000300}.tar
+ epoch_size: 301000
+COCOW-TRAIN:
+ TYPE: ImageDataset
+ URLS: wilor_training_data/dataset_tars/cocow-train/{000000..000036}.tar
+ epoch_size: 78666
+DEX-TRAIN:
+ TYPE: ImageDataset
+ URLS: wilor_training_data/dataset_tars/dex-train/{000000..000406}.tar
+ epoch_size: 406888
+FREIHAND-MOCAP:
+ DATASET_FILE: wilor_training_data/freihand_mocap.npz
+FREIHAND-TRAIN:
+ TYPE: ImageDataset
+ URLS: wilor_training_data/dataset_tars/freihand-train/{000000..000130}.tar
+ epoch_size: 130240
+H2O3D-TRAIN:
+ TYPE: ImageDataset
+ URLS: wilor_training_data/dataset_tars/h2o3d-train/{000000..000060}.tar
+ epoch_size: 121996
+HALPE-TRAIN:
+ TYPE: ImageDataset
+ URLS: wilor_training_data/dataset_tars/halpe-train/{000000..000022}.tar
+ epoch_size: 34289
+HO3D-TRAIN:
+ TYPE: ImageDataset
+ URLS: wilor_training_data/dataset_tars/ho3d-train/{000000..000083}.tar
+ epoch_size: 83325
+HOT3D-TRAIN:
+ TYPE: ImageDataset
+ URLS: wilor_training_data/dataset_tars/hot3d-train/{000000..000571}.tar
+ epoch_size: 572000
+INTERHAND26M-TRAIN:
+ TYPE: ImageDataset
+ URLS: wilor_training_data/dataset_tars/interhand26m-train/{000000..001056}.tar
+ epoch_size: 1424632
+MPIINZSL-TRAIN:
+ TYPE: ImageDataset
+ URLS: wilor_training_data/dataset_tars/mpiinzsl-train/{000000..000015}.tar
+ epoch_size: 15184
+MTC-TRAIN:
+ TYPE: ImageDataset
+ URLS: wilor_training_data/dataset_tars/mtc-train/{000000..000306}.tar
+ epoch_size: 363947
+REINTER-TRAIN:
+ TYPE: ImageDataset
+ URLS: wilor_training_data/dataset_tars/reinter-train/{000000..000418}.tar
+ epoch_size: 419000
+RHD-TRAIN:
+ TYPE: ImageDataset
+ URLS: wilor_training_data/dataset_tars/rhd-train/{000000..000041}.tar
+ epoch_size: 61705
diff --git a/WiLoR/pretrained_models/model_config.yaml b/WiLoR/pretrained_models/model_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0156431ff66c25db0a83aff341d276be4f8b799b
--- /dev/null
+++ b/WiLoR/pretrained_models/model_config.yaml
@@ -0,0 +1,119 @@
+task_name: train
+tags:
+- dev
+train: true
+test: false
+ckpt_path: null
+seed: null
+DATASETS:
+ TRAIN:
+ FREIHAND-TRAIN:
+ WEIGHT: 0.2
+ INTERHAND26M-TRAIN:
+ WEIGHT: 0.1
+ MTC-TRAIN:
+ WEIGHT: 0.05
+ RHD-TRAIN:
+ WEIGHT: 0.05
+ COCOW-TRAIN:
+ WEIGHT: 0.05
+ HALPE-TRAIN:
+ WEIGHT: 0.05
+ MPIINZSL-TRAIN:
+ WEIGHT: 0.05
+ HO3D-TRAIN:
+ WEIGHT: 0.05
+ H2O3D-TRAIN:
+ WEIGHT: 0.05
+ DEX-TRAIN:
+ WEIGHT: 0.05
+ BEDLAM-TRAIN:
+ WEIGHT: 0.05
+ REINTER-TRAIN:
+ WEIGHT: 0.1
+ HOT3D-TRAIN:
+ WEIGHT: 0.05
+ ARCTIC-TRAIN:
+ WEIGHT: 0.1
+ VAL:
+ FREIHAND-TRAIN:
+ WEIGHT: 1.0
+ MOCAP: FREIHAND-MOCAP
+ BETAS_REG: true
+ CONFIG:
+ SCALE_FACTOR: 0.3
+ ROT_FACTOR: 30
+ TRANS_FACTOR: 0.02
+ COLOR_SCALE: 0.2
+ ROT_AUG_RATE: 0.6
+ TRANS_AUG_RATE: 0.5
+ DO_FLIP: false
+ FLIP_AUG_RATE: 0.0
+ EXTREME_CROP_AUG_RATE: 0.0
+ EXTREME_CROP_AUG_LEVEL: 1
+extras:
+ ignore_warnings: false
+ enforce_tags: true
+ print_config: true
+exp_name: WiLoR
+MANO:
+ DATA_DIR: mano_data
+ MODEL_PATH: ${MANO.DATA_DIR}
+ GENDER: neutral
+ NUM_HAND_JOINTS: 15
+ MEAN_PARAMS: ${MANO.DATA_DIR}/mano_mean_params.npz
+ CREATE_BODY_POSE: false
+EXTRA:
+ FOCAL_LENGTH: 5000
+ NUM_LOG_IMAGES: 4
+ NUM_LOG_SAMPLES_PER_IMAGE: 8
+ PELVIS_IND: 0
+GENERAL:
+ TOTAL_STEPS: 1000000
+ LOG_STEPS: 1000
+ VAL_STEPS: 1000
+ CHECKPOINT_STEPS: 1000
+ CHECKPOINT_SAVE_TOP_K: 1
+ NUM_WORKERS: 8
+ PREFETCH_FACTOR: 2
+TRAIN:
+ LR: 1.0e-05
+ WEIGHT_DECAY: 0.0001
+ BATCH_SIZE: 32
+ LOSS_REDUCTION: mean
+ NUM_TRAIN_SAMPLES: 2
+ NUM_TEST_SAMPLES: 64
+ POSE_2D_NOISE_RATIO: 0.01
+ SMPL_PARAM_NOISE_RATIO: 0.005
+MODEL:
+ IMAGE_SIZE: 256
+ IMAGE_MEAN:
+ - 0.485
+ - 0.456
+ - 0.406
+ IMAGE_STD:
+ - 0.229
+ - 0.224
+ - 0.225
+ BACKBONE:
+ TYPE: vit
+ PRETRAINED_WEIGHTS: training_data/vitpose_backbone.pth
+ MANO_HEAD:
+ TYPE: transformer_decoder
+ IN_CHANNELS: 2048
+ TRANSFORMER_DECODER:
+ depth: 6
+ heads: 8
+ mlp_dim: 1024
+ dim_head: 64
+ dropout: 0.0
+ emb_dropout: 0.0
+ norm: layer
+ context_dim: 1280
+LOSS_WEIGHTS:
+ KEYPOINTS_3D: 0.05
+ KEYPOINTS_2D: 0.01
+ GLOBAL_ORIENT: 0.001
+ HAND_POSE: 0.001
+ BETAS: 0.0005
+ ADVERSARIAL: 0.0005
diff --git a/WiLoR/requirements.txt b/WiLoR/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b273a0b4aef6b3500eb8d469452f385eb2147d82
--- /dev/null
+++ b/WiLoR/requirements.txt
@@ -0,0 +1,20 @@
+numpy
+opencv-python
+pyrender
+pytorch-lightning
+scikit-image
+smplx==0.1.28
+yacs
+chumpy @ git+https://github.com/mattloper/chumpy
+timm
+einops
+xtcocotools
+pandas
+hydra-core
+hydra-submitit-launcher
+hydra-colorlog
+pyrootutils
+rich
+webdataset
+gradio
+ultralytics==8.1.34
diff --git a/WiLoR/whim/Dataset_instructions.md b/WiLoR/whim/Dataset_instructions.md
new file mode 100644
index 0000000000000000000000000000000000000000..3bb65a46e08ebefb8e50975d301811a1a048a0a2
--- /dev/null
+++ b/WiLoR/whim/Dataset_instructions.md
@@ -0,0 +1,31 @@
+## WHIM Dataset
+
+**Annotations**
+
+The image annotations can be downloaded from the following Drive:
+
+```
+https://drive.google.com/drive/folders/1d9Fw7LfnF5oJuA6yE8T3xA-u9p6H5ObZ
+```
+
+**[Alternative]**: The image annotations can be also downloaded from Hugging Face:
+```
+https://huggingface.co/datasets/rolpotamias/WHIM
+```
+If you are using Hugging Face you might need to merge the training zip files into a single file before uncompressing:
+```
+cat train_split.zip* > ~/train_split.zip
+```
+
+**Images**
+
+To download the corresponding images you need to first download the YouTube videos and extract the specific frames.
+You will need to install ''pytubefix'' or any similar package to download YouTube videos:
+```
+pip install -Iv pytubefix==8.12.2
+```
+You can then run the following command to download the corresponding train/test images:
+```
+python download_videos.py --mode {train/test}
+```
+Please make sure that the data are downloaded in the same directory.
diff --git a/WiLoR/whim/test_video_ids.json b/WiLoR/whim/test_video_ids.json
new file mode 100644
index 0000000000000000000000000000000000000000..b876df97db7ad2dc2773c424af7c0e7cb8e8d126
--- /dev/null
+++ b/WiLoR/whim/test_video_ids.json
@@ -0,0 +1 @@
+{"YynYZyoETto": {"res": [360, 480], "length": 4678, "fps": 29.97002997002997}, "_iirwC_DvJ0": {"res": [480, 854], "length": 7994, "fps": 29.97002997002997}, "ZMnb9TTsx98": {"res": [1080, 1920], "length": 8109, "fps": 29.97002997002997}, "IrSIHJ0-AaU": {"res": [360, 640], "length": 880, "fps": 30.0}, "w2ULyzWkZ3k": {"res": [1080, 1920], "length": 17032, "fps": 29.97002997002997}, "ivyqQreoVQA": {"res": [1080, 1440], "length": 9610, "fps": 29.97002997002997}, "R07f8kg1h8o": {"res": [1080, 1920], "length": 10726, "fps": 23.976023976023978}, "7S9q1kAVmc0": {"res": [720, 1280], "length": 53757, "fps": 25.0}, "_Ce7G35GIqA": {"res": [720, 1280], "length": 1620, "fps": 30.0}, "lhHkJ3InQOE": {"res": [240, 320], "length": 1600, "fps": 11.988011988011989}, "NXRHcCScubA": {"res": [1080, 1920], "length": 9785, "fps": 29.97002997002997}, "DjFX4idkS3o": {"res": [720, 1280], "length": 5046, "fps": 29.97002997002997}, "06kKvQp4SfM": {"res": [720, 1280], "length": 2661, "fps": 30.0}, "8NqJiAu9W3Y": {"res": [720, 1280], "length": 4738, "fps": 29.97002997002997}, "nN5Y--biYv4": {"res": [720, 1280], "length": 38380, "fps": 29.97}, "OiAlJIaWOBg": {"res": [720, 1280], "length": 10944, "fps": 30.0}, "nJa_omJBzoU": {"res": [720, 1280], "length": 4311, "fps": 29.97002997002997}, "ff_xcsFJ8Pw": {"res": [720, 1280], "length": 5631, "fps": 29.97}, "Y1mNu5iFwMg": {"res": [720, 1280], "length": 7060, "fps": 30.0}, "Ipe9xJCfuTM": {"res": [1080, 1920], "length": 52419, "fps": 29.97002997002997}, "vRkcw9SRems": {"res": [1080, 1920], "length": 10282, "fps": 23.976023976023978}, "ChIJjJyBjQ0": {"res": [1080, 1920], "length": 20228, "fps": 29.97002997002997}, "bxZtXdVvfpc": {"res": [1080, 1920], "length": 2369, "fps": 23.976023976023978}, "MPeXy2U4yJM": {"res": [1080, 1920], "length": 6760, "fps": 24.0}, "wnKnoui3THA": {"res": [1080, 1920], "length": 7934, "fps": 25.0}, "gnArvcWaH6I": {"res": [480, 720], "length": 6864, "fps": 29.97002997002997}}
\ No newline at end of file
diff --git a/WiLoR/whim/train_video_ids.json b/WiLoR/whim/train_video_ids.json
new file mode 100644
index 0000000000000000000000000000000000000000..c38d1c34cd91b7c71274f269a49f76dafee82ffb
--- /dev/null
+++ b/WiLoR/whim/train_video_ids.json
@@ -0,0 +1 @@
+{"5-TyAsFwo40": {"res": [720, 960], "length": 3185, "fps": 29.97002997002997}, "6CBTaZ93X-I": {"res": [1080, 1920], "length": 4718, "fps": 29.97002997002997}, "0tQRCmF1lhE": {"res": [1080, 1920], "length": 5935, "fps": 23.976023976023978}, "7XN5Yj8mPHc": {"res": [1080, 1920], "length": 1010, "fps": 29.97002997002997}, "40TaTmMaqC0": {"res": [720, 1280], "length": 4854, "fps": 30.0}, "0ummhjbzT3w": {"res": [1080, 1920], "length": 9271, "fps": 23.976023976023978}, "2fcX77AHA2I": {"res": [720, 1280], "length": 4376, "fps": 23.976023976023978}, "-ChOGBEL5uE": {"res": [720, 1280], "length": 5759, "fps": 29.97002997002997}, "7ZeIBdPZhC0": {"res": [1080, 1920], "length": 7142, "fps": 23.976023976023978}, "6GLdekH_fqs": {"res": [1080, 1920], "length": 405, "fps": 29.97002997002997}, "5z_Sk7P10Gk": {"res": [480, 854], "length": 1839, "fps": 29.97002997002997}, "5azC2xW0Mc8": {"res": [1080, 1920], "length": 5591, "fps": 23.976023976023978}, "2deG-DmpnAQ": {"res": [1080, 1920], "length": 12947, "fps": 29.97002997002997}, "3WZeZ8czCBo": {"res": [720, 1280], "length": 6094, "fps": 29.97}, "3Byo1PPkZTY": {"res": [1080, 1920], "length": 3343, "fps": 29.97002997002997}, "4jPSJl8BjT4": {"res": [1080, 1920], "length": 3534, "fps": 29.97002997002997}, "3Tb6v6hbF0I": {"res": [720, 1280], "length": 6375, "fps": 29.97}, "1qWGfoeAt5U": {"res": [720, 720], "length": 934, "fps": 29.97002997002997}, "6n8PyIYzS-w": {"res": [1080, 1920], "length": 4011, "fps": 23.976023976023978}, "10XtDlKwdRo": {"res": [1080, 1920], "length": 1718, "fps": 29.97002997002997}, "59gGMhffroM": {"res": [720, 1280], "length": 3638, "fps": 30.0}, "6nBY2l4Q7DA": {"res": [720, 1280], "length": 6319, "fps": 29.97002997002997}, "2SEgXpck6-o": {"res": [720, 1280], "length": 3854, "fps": 29.97002997002997}, "5uyaCVlSQIY": {"res": [1080, 1920], "length": 13698, "fps": 30.0}, "62X7X-W4lcA": {"res": [720, 1080], "length": 578, "fps": 17.0}, "3wDlZYzsVzY": {"res": [1080, 1920], "length": 4451, "fps": 29.97002997002997}, "3i7FjHxe8DE": {"res": [1080, 1920], "length": 5523, "fps": 29.97002997002997}, "6lBrXPTzTBM": {"res": [1080, 1920], "length": 18993, "fps": 30.0}, "0mR7n0W1h9c": {"res": [1080, 1920], "length": 3907, "fps": 29.97002997002997}, "0INWLJXjBsk": {"res": [1080, 1920], "length": 7347, "fps": 29.97002997002997}, "4RgkyXwZ8Y8": {"res": [540, 1280], "length": 2869, "fps": 23.976023976023978}, "4SO84YB9bnU": {"res": [1080, 1920], "length": 9502, "fps": 23.976023976023978}, "2m5WjXKArt4": {"res": [720, 1080], "length": 1473, "fps": 15.0}, "4ckKnEOeT88": {"res": [720, 1280], "length": 2970, "fps": 29.97002997002997}, "-5pTTulJZ38": {"res": [360, 640], "length": 5729, "fps": 23.976023976023978}, "1oztZZLorBI": {"res": [720, 1280], "length": 8654, "fps": 29.97002997002997}, "2ucO7etmU-w": {"res": [720, 1280], "length": 26258, "fps": 30.0}, "7dgQoq2BEJ8": {"res": [1080, 1920], "length": 7380, "fps": 30.0}, "4LVtQ_shm-M": {"res": [720, 1280], "length": 2565, "fps": 29.97002997002997}, "6XlnqBSlSf8": {"res": [720, 1280], "length": 3836, "fps": 29.97}, "3QURUfXj494": {"res": [720, 1280], "length": 4259, "fps": 24.0}, "1OR8qUmcR9g": {"res": [1080, 1920], "length": 376, "fps": 23.976023976023978}, "PPTKG6jSNps": {"res": [720, 1280], "length": 5122, "fps": 30.0}, "1JI0VePnXUg": {"res": [720, 1280], "length": 3269, "fps": 30.0}, "4HjpxzKwpyw": {"res": [1080, 1920], "length": 21914, "fps": 30.0}, "5j0AVhkfYaI": {"res": [1080, 1920], "length": 10701, "fps": 23.976023976023978}, "-OQn9wWc4a4": {"res": [1080, 1920], "length": 2773, "fps": 23.976023976023978}, "2r-hs8cizg4": {"res": [1080, 1920], "length": 5200, "fps": 29.97002997002997}, "3JxIU16AK4E": {"res": [720, 1280], "length": 25299, "fps": 30.0}, "7r4e0AnFfIA": {"res": [720, 1280], "length": 3367, "fps": 29.97}, "1CzOg-MVqs4": {"res": [480, 640], "length": 15395, "fps": 29.97002997002997}, "1HTJc_NY9_U": {"res": [1080, 1920], "length": 2014, "fps": 23.976023976023978}, "1HBEGIOndTE": {"res": [720, 1280], "length": 2484, "fps": 30.0}, "4BjPdsTYgpQ": {"res": [720, 1280], "length": 6133, "fps": 30.0}, "0u2AVVL5KRE": {"res": [720, 1280], "length": 9725, "fps": 29.97002997002997}, "1zRfH_9kqtk": {"res": [1080, 1920], "length": 9163, "fps": 29.97002997002997}, "6oSVUOYiy-o": {"res": [720, 1280], "length": 5997, "fps": 15.0}, "7aUa50q0GcE": {"res": [720, 1280], "length": 15160, "fps": 29.97}, "3-MyqNsct4M": {"res": [720, 1280], "length": 1871, "fps": 29.97002997002997}, "-ZDR-PsGMUo": {"res": [720, 1280], "length": 11142, "fps": 30.0}, "2LwV2AuvNaU": {"res": [1080, 1920], "length": 9866, "fps": 30.0}, "2-p5HVrDLik": {"res": [720, 1280], "length": 10185, "fps": 29.97002997002997}, "0x09FfojaSM": {"res": [1080, 1920], "length": 2410, "fps": 29.97002997002997}, "299RErdnW94": {"res": [1080, 1920], "length": 15030, "fps": 29.97002997002997}, "2Jk-LnStEvQ": {"res": [720, 1280], "length": 12677, "fps": 29.97002997002997}, "5w2uzL9-elE": {"res": [720, 1280], "length": 5013, "fps": 29.97002997002997}, "6otNBTeQN7M": {"res": [720, 1280], "length": 3702, "fps": 29.97}, "1nG-UNiXjiI": {"res": [1080, 1080], "length": 3833, "fps": 23.976023976023978}, "2CpXjTKQkvI": {"res": [1080, 1920], "length": 5310, "fps": 29.97002997002997}, "1f33MBtnSWk": {"res": [360, 640], "length": 2127, "fps": 29.97}, "4MvYLIoubQc": {"res": [1080, 1920], "length": 8642, "fps": 23.976023976023978}, "74cZyFxTBUA": {"res": [1080, 1920], "length": 2245, "fps": 29.786}, "2SEp74TgXeM": {"res": [720, 1280], "length": 19211, "fps": 30.0}, "3bQ6-0PfEN4": {"res": [1080, 1920], "length": 6847, "fps": 29.97002997002997}, "4ydvtS8bXco": {"res": [1080, 1920], "length": 2615, "fps": 29.97002997002997}, "4XrpeZB5zZQ": {"res": [720, 1280], "length": 10285, "fps": 29.97002997002997}, "59vxdDvvDAM": {"res": [360, 640], "length": 14399, "fps": 29.97002997002997}, "7X-yER4jLkw": {"res": [1080, 1920], "length": 8569, "fps": 29.97002997002997}, "7Letpg0QCtY": {"res": [1080, 1920], "length": 9671, "fps": 23.976023976023978}, "2fBh7sc0ti8": {"res": [1080, 1920], "length": 2176, "fps": 29.0}, "82XwIKFGh6U": {"res": [1080, 1920], "length": 10321, "fps": 23.976023976023978}, "1SXpmomfrjU": {"res": [720, 1280], "length": 29624, "fps": 29.97002997002997}, "4uYZQSjrdf8": {"res": [720, 1280], "length": 3488, "fps": 24.0}, "3Q1ltxsVHyw": {"res": [1080, 1920], "length": 11445, "fps": 25.0}, "0JYql1QeUUI": {"res": [1080, 1920], "length": 18705, "fps": 30.0}, "-QHnZBBE8Ho": {"res": [1080, 1920], "length": 17747, "fps": 30.0}, "5y_POcCcSoY": {"res": [1080, 1920], "length": 34510, "fps": 24.0}, "2W8nPQFb3ZY": {"res": [720, 1280], "length": 12863, "fps": 29.743}, "-G2ksp95_Uw": {"res": [360, 638], "length": 9870, "fps": 29.97002997002997}, "1tdocyR8uvk": {"res": [1080, 1920], "length": 2445, "fps": 23.976023976023978}, "87tCbB2Obhc": {"res": [1080, 1920], "length": 76241, "fps": 29.97002997002997}, "5byN0HNkOFQ": {"res": [720, 1280], "length": 906, "fps": 29.847}, "7mXqT8MubgA": {"res": [720, 1280], "length": 9452, "fps": 23.976023976023978}, "6sHCD5Z-PY8": {"res": [720, 1280], "length": 8292, "fps": 30.0}, "itgzjJAnRuU": {"res": [720, 1280], "length": 17076, "fps": 29.97002997002997}, "4DISKhYQK6U": {"res": [1080, 1920], "length": 2010, "fps": 29.97002997002997}, "7RPhNw37nZo": {"res": [720, 1280], "length": 2632, "fps": 24.0}, "29wUp2U9WsM": {"res": [720, 1280], "length": 3926, "fps": 30.0}, "5algvYiLuSc": {"res": [360, 640], "length": 3737, "fps": 29.97002997002997}, "0t82hD3rkCU": {"res": [480, 854], "length": 2505, "fps": 25.0}, "1p1gWBabLOc": {"res": [1080, 1920], "length": 4218, "fps": 29.97002997002997}, "1d7meFB92Z0": {"res": [1080, 1920], "length": 22208, "fps": 29.97002997002997}, "2xqgpo-bhEs": {"res": [720, 1280], "length": 3043, "fps": 30.0}, "57xL-Kn_a6Q": {"res": [720, 720], "length": 1079, "fps": 30.0}, "4XggcA30x2k": {"res": [720, 1280], "length": 5434, "fps": 29.97002997002997}, "0EV7Cfdw58s": {"res": [720, 1280], "length": 9561, "fps": 30.0}, "5Jmlftgw9-E": {"res": [720, 1280], "length": 4033, "fps": 30.0}, "1uAcqeRAURY": {"res": [720, 1280], "length": 1423, "fps": 30.0}, "7h6_kO0gpXo": {"res": [1080, 1920], "length": 51398, "fps": 30.0}, "2cnYrWfAeYU": {"res": [720, 1280], "length": 24116, "fps": 30.0}, "0axfOnsyUE8": {"res": [1080, 1920], "length": 11590, "fps": 23.976023976023978}, "6LM-dcvG38I": {"res": [720, 1280], "length": 789, "fps": 29.97002997002997}, "5Teum5PgaaE": {"res": [480, 654], "length": 7370, "fps": 29.97002997002997}, "7NITc1vKQz0": {"res": [480, 854], "length": 4827, "fps": 29.97002997002997}, "7_ihpter5zg": {"res": [1080, 1920], "length": 1532, "fps": 30.0}, "45vsVWWiJ4Q": {"res": [720, 1280], "length": 3991, "fps": 15.0}, "6xBY4pbcInw": {"res": [1080, 1920], "length": 2783, "fps": 29.97002997002997}, "0vzECnJwdOY": {"res": [1080, 1920], "length": 13201, "fps": 23.976023976023978}, "0VQ9H3AxdGc": {"res": [1080, 1920], "length": 29767, "fps": 29.97002997002997}, "5ikPiwUfsJM": {"res": [360, 640], "length": 2159, "fps": 25.0}, "66_C4RzxVd0": {"res": [1080, 1920], "length": 8413, "fps": 23.976023976023978}, "2_PWeBb41qM": {"res": [720, 1280], "length": 27169, "fps": 30.0}, "0s6E7uGXjRc": {"res": [1080, 1920], "length": 5129, "fps": 30.0}, "2UiS6Sre3XA": {"res": [1080, 1920], "length": 2314, "fps": 30.0}, "0gLuBychKgQ": {"res": [360, 640], "length": 4855, "fps": 29.97002997002997}, "3u5q5MZv0mI": {"res": [1080, 1920], "length": 3610, "fps": 29.94}, "5BM9FSYEeo0": {"res": [720, 1280], "length": 9019, "fps": 29.97}, "3mfHu_ianaQ": {"res": [1080, 1920], "length": 16613, "fps": 29.97002997002997}, "7bhZ6-iYHM0": {"res": [720, 1280], "length": 30610, "fps": 15.0}, "4tzik1GByBM": {"res": [720, 1280], "length": 6404, "fps": 30.0}, "1y8vXjLQWL0": {"res": [1080, 1920], "length": 7194, "fps": 29.97002997002997}, "4SLHWjGmHR4": {"res": [1080, 1920], "length": 17325, "fps": 29.97002997002997}, "23kppT5hKWU": {"res": [480, 854], "length": 7257, "fps": 29.97002997002997}, "4ZWZaCaywiE": {"res": [1080, 1920], "length": 17367, "fps": 29.97002997002997}, "0FBP71iD9u4": {"res": [480, 720], "length": 32302, "fps": 24.0}, "0gzQAgjx39o": {"res": [480, 640], "length": 992, "fps": 25.0}, "5CQ9N0ls0Hc": {"res": [1080, 1920], "length": 1002, "fps": 29.97002997002997}, "3TEQLFpSk2I": {"res": [1080, 1920], "length": 16979, "fps": 30.0}, "2ofPs_uTel4": {"res": [1080, 1920], "length": 15681, "fps": 23.976023976023978}, "22V4MQ8dc2Q": {"res": [480, 640], "length": 14589, "fps": 29.97002997002997}, "3USiBRH9f9w": {"res": [360, 640], "length": 3460, "fps": 29.97002997002997}, "7dWf7aNpFto": {"res": [1080, 1920], "length": 8384, "fps": 23.976023976023978}, "2WvGsW3j3tI": {"res": [1080, 1920], "length": 1789, "fps": 29.97002997002997}, "1lqXl-xn0e4": {"res": [480, 640], "length": 10739, "fps": 29.97002997002997}, "0GgsslCMqxk": {"res": [720, 1280], "length": 3295, "fps": 30.0}, "vYQ3a-RLd4c": {"res": [720, 1280], "length": 15743, "fps": 30.0}, "54q7y2npxbM": {"res": [1080, 1920], "length": 18341, "fps": 30.0}, "7W5O55LihF4": {"res": [720, 1280], "length": 958, "fps": 30.0}, "42G9c1uWjDE": {"res": [1080, 1920], "length": 19295, "fps": 29.97002997002997}, "0WcGVcWLLy0": {"res": [1080, 1920], "length": 15518, "fps": 29.97002997002997}, "1OJS1t-6_9k": {"res": [720, 1280], "length": 12875, "fps": 29.97002997002997}, "7Z4xEaJFcNc": {"res": [720, 1280], "length": 13785, "fps": 29.97002997002997}, "6DLCoC1MWi0": {"res": [720, 1280], "length": 24430, "fps": 30.0}, "7y_OtgaSdx4": {"res": [1080, 1920], "length": 12919, "fps": 29.97002997002997}, "56n3PZ4Syrs": {"res": [720, 1280], "length": 21077, "fps": 29.97002997002997}, "0ISFR5_G2Nk": {"res": [1080, 1920], "length": 3600, "fps": 29.97002997002997}, "5F8t4Bd-aWM": {"res": [1080, 1920], "length": 1786, "fps": 30.0}, "45T8o3_RJAU": {"res": [454, 854], "length": 783, "fps": 29.929}, "3mXKwT5Cgsk": {"res": [1080, 1920], "length": 1025, "fps": 29.97002997002997}, "86OJd-nTUpE": {"res": [1080, 1920], "length": 5736, "fps": 30.0}, "5C6g05c5v4Q": {"res": [576, 1280], "length": 68731, "fps": 25.0}, "5M90r1nf5M8": {"res": [480, 640], "length": 7322, "fps": 30.0}, "4st-CDVOtOU": {"res": [1080, 1920], "length": 4856, "fps": 30.0}, "3HjHyYdKeJ8": {"res": [1080, 1920], "length": 1693, "fps": 29.97002997002997}, "50EV9mbGVWY": {"res": [360, 640], "length": 721, "fps": 23.976023976023978}, "4r465_ijzhA": {"res": [720, 1280], "length": 11614, "fps": 27.0}, "3D75qSOTAvk": {"res": [1080, 1920], "length": 1653, "fps": 29.97002997002997}, "2QSlChnnFmU": {"res": [1080, 1920], "length": 4428, "fps": 29.97002997002997}, "4qXcwFHl100": {"res": [720, 1280], "length": 28414, "fps": 30.0}, "-LoU4OvDdgs": {"res": [1080, 1920], "length": 22747, "fps": 29.97002997002997}, "0m3glzoQMfI": {"res": [1080, 1920], "length": 4768, "fps": 29.97002997002997}, "5jBvbdAiwP4": {"res": [1080, 1920], "length": 8656, "fps": 30.0}, "-acP9jXuPNA": {"res": [720, 1280], "length": 10124, "fps": 30.0}, "299fkv24i_M": {"res": [1080, 1920], "length": 10197, "fps": 23.976023976023978}, "-05MRmp1i4w": {"res": [1080, 1920], "length": 938, "fps": 15.0}, "1YYaRxqSxNc": {"res": [720, 1280], "length": 4386, "fps": 29.97002997002997}, "6w1-4eAUHmk": {"res": [360, 640], "length": 2460, "fps": 30.0}, "-IgqjDsP-R8": {"res": [720, 1280], "length": 4687, "fps": 30.0}, "5Ua91WrHrLU": {"res": [480, 854], "length": 1027, "fps": 23.976023976023978}, "-arU7prTkZY": {"res": [1080, 1920], "length": 1681, "fps": 29.97002997002997}, "52Agg5gy2N4": {"res": [360, 640], "length": 2131, "fps": 30.0}, "5NHgcT-kGqo": {"res": [720, 1280], "length": 11398, "fps": 29.97002997002997}, "2KIyVF36spo": {"res": [1080, 1920], "length": 3514, "fps": 30.0}, "6DuQPRnG0Ww": {"res": [1080, 1920], "length": 31005, "fps": 24.0}, "2U8Ih4IiYSs": {"res": [1080, 608], "length": 2702, "fps": 30.0}, "4bzK9QxsTZ0": {"res": [720, 1280], "length": 3994, "fps": 29.97}, "65KUxTxQbU8": {"res": [360, 584], "length": 13968, "fps": 29.97002997002997}, "0ulVg4h6W9E": {"res": [1080, 1920], "length": 14339, "fps": 29.97002997002997}, "45fga7XQh_w": {"res": [360, 640], "length": 10816, "fps": 29.97}, "1Ov5Kuhijeo": {"res": [1080, 1920], "length": 33300, "fps": 29.97002997002997}, "7SkTWPLJVFg": {"res": [1080, 1920], "length": 2986, "fps": 29.97002997002997}, "3fWmeoRWkro": {"res": [720, 1280], "length": 3544, "fps": 29.337}, "1GbeP6UsNdo": {"res": [480, 640], "length": 1847, "fps": 30.0}, "0pHAEC2ydR0": {"res": [480, 640], "length": 1418, "fps": 18.228}, "6R1cM5x04fc": {"res": [474, 854], "length": 4375, "fps": 15.0}, "4YVVERSg6pc": {"res": [1080, 1920], "length": 7909, "fps": 23.976023976023978}, "78CLW1ydkyU": {"res": [360, 480], "length": 15249, "fps": 30.0}, "6WRkAQJKsf0": {"res": [720, 1280], "length": 2536, "fps": 24.0}, "5Xbze7jMcM0": {"res": [1080, 1920], "length": 3023, "fps": 29.97002997002997}, "3bFbHvIHOZI": {"res": [1080, 1920], "length": 4328, "fps": 29.97002997002997}, "-K-HfBcGPXU": {"res": [720, 1280], "length": 24848, "fps": 30.0}, "3gPumCu8Whk": {"res": [1080, 1080], "length": 2317, "fps": 29.97002997002997}, "7WvxhNo523A": {"res": [360, 640], "length": 12822, "fps": 29.97002997002997}, "0k2iDuG8DnY": {"res": [1080, 1920], "length": 19856, "fps": 29.97002997002997}, "7z3uF03sdtw": {"res": [1080, 1920], "length": 677, "fps": 29.97002997002997}, "-ANTW_UxQqc": {"res": [1080, 608], "length": 743, "fps": 29.97002997002997}, "-0TymhADTXU": {"res": [1080, 1920], "length": 11543, "fps": 29.97002997002997}, "3fo73EJ3XX8": {"res": [720, 1280], "length": 5718, "fps": 30.0}, "5XDn22CL-ig": {"res": [1080, 1920], "length": 12737, "fps": 29.97002997002997}, "1Q2nPVc2-Nk": {"res": [1080, 1920], "length": 3786, "fps": 29.97002997002997}, "1YjsTc70vK4": {"res": [1080, 1920], "length": 10863, "fps": 29.97002997002997}, "1ROgPtwNQJA": {"res": [1080, 1920], "length": 10444, "fps": 23.976023976023978}, "4c4JlVlsyIs": {"res": [720, 1280], "length": 5699, "fps": 25.0}, "5jyWHxCKwS0": {"res": [480, 654], "length": 4458, "fps": 29.97002997002997}, "35lxIaGGM5Q": {"res": [480, 854], "length": 1285, "fps": 23.976023976023978}, "7272RXfPqYg": {"res": [480, 854], "length": 8519, "fps": 29.97002997002997}, "4wy0XftG3nQ": {"res": [1080, 1920], "length": 14440, "fps": 29.97002997002997}, "4ktroOVK0Z8": {"res": [720, 1280], "length": 3012, "fps": 29.97002997002997}, "4MUq3Prl8pU": {"res": [1080, 1920], "length": 14418, "fps": 29.97002997002997}, "3dFFzsCxZDg": {"res": [480, 854], "length": 9787, "fps": 29.97002997002997}, "43eBA67htJc": {"res": [720, 406], "length": 8317, "fps": 29.97002997002997}, "2hcLIu4HCcY": {"res": [720, 1280], "length": 3523, "fps": 29.97002997002997}, "34DMNsPor7I": {"res": [1080, 1920], "length": 314, "fps": 29.97002997002997}, "66iGd5YlT0Q": {"res": [360, 640], "length": 9162, "fps": 30.0}, "5khp3y3cbtA": {"res": [1080, 1920], "length": 12546, "fps": 29.97002997002997}, "4darflX3K9s": {"res": [720, 1280], "length": 13519, "fps": 29.97002997002997}, "0L0MNEN60iM": {"res": [1080, 1920], "length": 22995, "fps": 30.0}, "3zRUoFOUi4Q": {"res": [1080, 1920], "length": 9650, "fps": 29.97002997002997}, "-2UZsKfaGIk": {"res": [720, 1280], "length": 11130, "fps": 29.97002997002997}, "6I1IhiMISZ0": {"res": [720, 1280], "length": 7179, "fps": 29.97002997002997}, "41FOWmQsKPI": {"res": [1080, 1920], "length": 2838, "fps": 29.97002997002997}, "3HBCwm6mPog": {"res": [720, 1280], "length": 1299, "fps": 30.0}, "6TDs5FKNTmM": {"res": [1080, 1920], "length": 9374, "fps": 29.97002997002997}, "6QLiJylSG4M": {"res": [1080, 1920], "length": 4883, "fps": 29.97002997002997}, "2f7m4cB-0JU": {"res": [720, 1280], "length": 1607, "fps": 15.012}, "5nUwyw3pxOo": {"res": [480, 654], "length": 7020, "fps": 29.97002997002997}, "4v_6Fsh6k10": {"res": [1080, 1920], "length": 1650, "fps": 24.0}, "45bVtWZdg4A": {"res": [720, 1280], "length": 1431, "fps": 29.97}, "2sRkc0SllW8": {"res": [1080, 1920], "length": 17097, "fps": 29.97002997002997}, "5cjkZmWlhKE": {"res": [1080, 1920], "length": 14844, "fps": 29.97002997002997}, "2Iax_vC1aMk": {"res": [720, 1280], "length": 1186, "fps": 30.0}, "0F1CZq3opMw": {"res": [1080, 1920], "length": 3529, "fps": 30.0}, "5o_LSjNkOQE": {"res": [720, 1280], "length": 1960, "fps": 29.97002997002997}, "1S0otQxKUcs": {"res": [720, 1080], "length": 4589, "fps": 29.426}, "3FRP6ofnay0": {"res": [720, 1280], "length": 1064, "fps": 15.072}, "3_o_AkadIsw": {"res": [480, 640], "length": 5107, "fps": 29.97002997002997}, "4joAmB3hKVs": {"res": [1080, 1920], "length": 15376, "fps": 23.976023976023978}, "1pk1EHZSGwQ": {"res": [360, 640], "length": 11390, "fps": 29.97}, "0NE-fxfnfsw": {"res": [1080, 1920], "length": 7506, "fps": 30.0}, "7CZnCOn0ozo": {"res": [720, 1280], "length": 6062, "fps": 29.97002997002997}, "3hEV4jANTO4": {"res": [1080, 1920], "length": 8682, "fps": 30.0}, "4kUTSXWZFyc": {"res": [720, 1280], "length": 1252, "fps": 16.677}, "0TOvEO_WPZE": {"res": [710, 1280], "length": 19772, "fps": 30.0}, "3ajo_dtCfGg": {"res": [480, 640], "length": 3941, "fps": 29.97002997002997}, "5-XiLc5yNM4": {"res": [720, 1280], "length": 22349, "fps": 29.97002997002997}, "7T1umcntZTA": {"res": [720, 1280], "length": 8689, "fps": 29.97002997002997}, "0k6USIJasZY": {"res": [720, 1280], "length": 340, "fps": 29.97002997002997}, "69g62O-KdNg": {"res": [720, 1280], "length": 8868, "fps": 29.97002997002997}, "6W36OJpocGY": {"res": [720, 1280], "length": 9327, "fps": 30.0}, "2iwlB8-AZFg": {"res": [480, 640], "length": 283, "fps": 29.97002997002997}, "-4FUS54WneA": {"res": [720, 1280], "length": 5075, "fps": 29.97002997002997}, "0qr8Y9u7lK8": {"res": [1080, 1920], "length": 16935, "fps": 30.0}, "1M40DOru15Y": {"res": [480, 640], "length": 2758, "fps": 23.976023976023978}, "182RxC-rQZw": {"res": [720, 1280], "length": 3186, "fps": 30.0}, "318OuzwpCzc": {"res": [1080, 1920], "length": 5675, "fps": 30.0}, "7zzqApG_sEI": {"res": [720, 1280], "length": 7960, "fps": 29.97002997002997}, "7NkRULfZkiY": {"res": [360, 640], "length": 5846, "fps": 29.97002997002997}, "7pnETbZmvjk": {"res": [720, 1280], "length": 1257, "fps": 16.466}, "3KpGlsjPvi0": {"res": [720, 1280], "length": 846, "fps": 30.0}, "1lc8__0n4kQ": {"res": [1080, 1920], "length": 10663, "fps": 24.0}, "2hanDNjY1zo": {"res": [1080, 1920], "length": 764, "fps": 29.97002997002997}, "7IuNV-gcRLs": {"res": [720, 1280], "length": 922, "fps": 30.0}, "7n5kOiPqqu4": {"res": [480, 640], "length": 2997, "fps": 29.97002997002997}, "1nJt4VHPNE0": {"res": [720, 1280], "length": 2027, "fps": 30.0}, "7hx4kWJ2zDg": {"res": [720, 1280], "length": 1760, "fps": 29.97002997002997}, "7vT58eLvyfM": {"res": [360, 624], "length": 5237, "fps": 29.97002997002997}, "3TU7xovzhMo": {"res": [720, 1280], "length": 5169, "fps": 29.97002997002997}, "6FsVL71Vv2s": {"res": [360, 480], "length": 13195, "fps": 30.0}, "7yOoVC0OriY": {"res": [720, 1280], "length": 22788, "fps": 29.97002997002997}, "4K9zcemtfjQ": {"res": [720, 1280], "length": 10103, "fps": 29.97002997002997}, "-JGpOd2AlVY": {"res": [480, 640], "length": 6166, "fps": 29.97002997002997}, "-GtDaiSJkSQ": {"res": [360, 640], "length": 3301, "fps": 25.0}, "1uz3vYd3o6A": {"res": [1080, 1920], "length": 15625, "fps": 29.97002997002997}, "3s7TnwfKSN8": {"res": [1080, 1920], "length": 1770, "fps": 29.97002997002997}, "6c5924rcou4": {"res": [480, 854], "length": 3265, "fps": 30.0}, "3_TWJCoDIVA": {"res": [720, 1280], "length": 12178, "fps": 29.97}, "4PoLnjucRcw": {"res": [720, 1280], "length": 3320, "fps": 30.0}, "1GhFf3notdw": {"res": [480, 768], "length": 14788, "fps": 29.97002997002997}, "5vvBRQi_X5o": {"res": [1080, 1920], "length": 11586, "fps": 29.97002997002997}, "1l5EGDbwz1U": {"res": [1080, 1920], "length": 2899, "fps": 30.0}, "2EKVWMwZEN0": {"res": [1080, 1920], "length": 19449, "fps": 29.97002997002997}, "6EqXNRLauMs": {"res": [1080, 1920], "length": 18318, "fps": 29.97002997002997}, "6IlzyR80mU4": {"res": [1080, 1920], "length": 33887, "fps": 24.0}, "-aLiyA30EQI": {"res": [480, 640], "length": 969, "fps": 15.0}, "1GdzGKD-VnQ": {"res": [1080, 1920], "length": 6022, "fps": 30.0}, "3hZoT_dq2mQ": {"res": [1080, 1920], "length": 8891, "fps": 23.976023976023978}, "1fJ1H9FNN6U": {"res": [1080, 1920], "length": 13168, "fps": 23.976023976023978}, "3ny1DoUAOTs": {"res": [1080, 1920], "length": 20820, "fps": 29.97002997002997}, "0qknoxaCwgg": {"res": [720, 1280], "length": 2249, "fps": 29.97002997002997}, "5CShxp46axE": {"res": [480, 848], "length": 5065, "fps": 30.0}, "0J0lK5EHIAY": {"res": [360, 640], "length": 7896, "fps": 29.97}, "73jtoG3Yl9Y": {"res": [1080, 608], "length": 6130, "fps": 29.97002997002997}, "2YdPY7GMPBU": {"res": [718, 1280], "length": 11981, "fps": 29.594}, "66IddloOA8A": {"res": [720, 1280], "length": 522, "fps": 30.0}, "0v60vuyjGqA": {"res": [1080, 1920], "length": 2651, "fps": 30.0}, "5QO6BTt9-T0": {"res": [720, 1280], "length": 4753, "fps": 29.97002997002997}, "4d5o3B6wRP0": {"res": [1080, 1920], "length": 13317, "fps": 30.0}, "1UWY3vlEOd8": {"res": [480, 654], "length": 1996, "fps": 29.97002997002997}, "0W3wXxl1Fwk": {"res": [480, 720], "length": 20917, "fps": 25.0}, "0fqGjsbAkdU": {"res": [1080, 1920], "length": 3923, "fps": 29.97002997002997}, "2zn24j-XfmI": {"res": [360, 202], "length": 8607, "fps": 29.97002997002997}, "4fqpBXE0_Dc": {"res": [1080, 1920], "length": 3430, "fps": 29.97002997002997}, "7YL5SVVXHCo": {"res": [360, 640], "length": 4257, "fps": 29.97002997002997}, "hPmc7R_dAxc": {"res": [720, 1280], "length": 11467, "fps": 24.0}, "7EnQV3iYp8E": {"res": [1080, 1920], "length": 1742, "fps": 29.97002997002997}, "-HkeOGWJWLI": {"res": [480, 720], "length": 4023, "fps": 29.97002997002997}, "0kQEvxqIi4k": {"res": [360, 640], "length": 326, "fps": 29.97002997002997}, "-60AS2h1R2M": {"res": [1080, 1920], "length": 10429, "fps": 29.97002997002997}, "74v26n8OHpE": {"res": [360, 640], "length": 5930, "fps": 25.0}, "2O64ypfQZ3Q": {"res": [1080, 1920], "length": 5503, "fps": 30.0}, "3qMLOtMJW_c": {"res": [480, 654], "length": 5019, "fps": 29.97002997002997}, "6jjx_r_sgj0": {"res": [720, 1280], "length": 8155, "fps": 29.97}, "1RR1uwVezgc": {"res": [720, 1080], "length": 953, "fps": 16.983016983016984}, "3BrgsrwOSHk": {"res": [1080, 1920], "length": 15166, "fps": 29.97002997002997}, "82bNCO3cHWM": {"res": [480, 640], "length": 2776, "fps": 29.97002997002997}, "0FFdnI-tQyw": {"res": [480, 720], "length": 13909, "fps": 29.97002997002997}, "5JVKNSm9tPk": {"res": [360, 640], "length": 2834, "fps": 29.97002997002997}, "336iG0cnp1I": {"res": [1080, 1920], "length": 14745, "fps": 29.97002997002997}, "4C_SRuupQJI": {"res": [1080, 1920], "length": 18730, "fps": 25.0}, "1GO8SLX7tLs": {"res": [1080, 1920], "length": 2074, "fps": 23.976023976023978}, "3_LiLpewX64": {"res": [1080, 1920], "length": 926, "fps": 29.97002997002997}, "5_gLhl1LoXc": {"res": [720, 1280], "length": 1753, "fps": 25.0}, "5oBbQdI48js": {"res": [1080, 1920], "length": 3462, "fps": 29.97002997002997}, "63KIq9VyXo8": {"res": [1080, 1920], "length": 9435, "fps": 23.976023976023978}, "4cBkTN7-JTU": {"res": [1080, 1920], "length": 10103, "fps": 23.976023976023978}, "4DBavnAzNpM": {"res": [1080, 608], "length": 4260, "fps": 24.016}, "-7L2DL9IOEU": {"res": [1080, 1920], "length": 4368, "fps": 29.97002997002997}, "4AsiL4C8l_E": {"res": [720, 1280], "length": 16707, "fps": 29.97002997002997}, "64Ua7hXQKYw": {"res": [1080, 1920], "length": 12708, "fps": 29.97002997002997}, "6hvzYJ8Hrh8": {"res": [1080, 1920], "length": 4642, "fps": 23.976023976023978}, "4gc6fwXwFV0": {"res": [720, 1280], "length": 3748, "fps": 29.97002997002997}, "4u07zqg_jRY": {"res": [1080, 1920], "length": 9440, "fps": 30.0}, "2Wbk-bP2EwM": {"res": [1080, 608], "length": 3392, "fps": 30.0}, "5at8X56tN58": {"res": [1080, 1920], "length": 27341, "fps": 23.976023976023978}, "0sLIDjSbdxA": {"res": [1080, 1920], "length": 658, "fps": 29.97002997002997}, "-IrAGNgJDzg": {"res": [480, 640], "length": 1844, "fps": 30.0}, "7ZlEYZYIEwk": {"res": [360, 480], "length": 3106, "fps": 30.0}, "2TjAKLUBJ7s": {"res": [1080, 1920], "length": 9875, "fps": 23.976023976023978}, "5CaUExhJXnk": {"res": [1080, 1920], "length": 2925, "fps": 29.97002997002997}, "4BFTOWMgAFM": {"res": [720, 1280], "length": 15622, "fps": 29.97002997002997}, "5DCo_bZ0PEo": {"res": [720, 1280], "length": 2249, "fps": 30.0}, "-HlGNm4D0NA": {"res": [1080, 1920], "length": 3479, "fps": 29.97002997002997}, "6RDO3AS9yzs": {"res": [1080, 1920], "length": 20134, "fps": 30.0}, "4px8Zvp6KKc": {"res": [480, 654], "length": 2805, "fps": 23.976023976023978}, "3j50-GpcSYU": {"res": [360, 640], "length": 6233, "fps": 29.97002997002997}, "1akHL1w-dg4": {"res": [1080, 1920], "length": 19073, "fps": 29.97002997002997}, "6iXbPZw4Xek": {"res": [360, 640], "length": 2117, "fps": 29.97002997002997}, "3oYUTZzpgqg": {"res": [1080, 1920], "length": 40708, "fps": 29.97002997002997}, "5ryiAfDbjD8": {"res": [1080, 1920], "length": 7709, "fps": 29.97002997002997}, "38_CaOQiLgk": {"res": [1080, 1920], "length": 1346, "fps": 29.97002997002997}, "35bR5TNTd2M": {"res": [720, 1152], "length": 12735, "fps": 29.715}, "69bd1R7yQYg": {"res": [1080, 1920], "length": 607, "fps": 23.976023976023978}, "0fZJVfJ2RBk": {"res": [1080, 1920], "length": 12935, "fps": 29.97002997002997}, "4krdBuFEMMA": {"res": [720, 1280], "length": 2954, "fps": 30.0}, "5w3294dX8Wk": {"res": [720, 1280], "length": 3244, "fps": 29.97002997002997}, "4Ln17L4iPaE": {"res": [720, 1280], "length": 1542, "fps": 29.97002997002997}, "20L1KcP9Zeo": {"res": [1080, 1920], "length": 20308, "fps": 30.0}, "5Z8lNPgCmuo": {"res": [720, 1280], "length": 829, "fps": 23.976023976023978}, "6aLr21Iza1U": {"res": [1080, 1920], "length": 1816, "fps": 30.0}, "72MBWdDPmSU": {"res": [720, 1280], "length": 12999, "fps": 29.97002997002997}, "4rWrNpTI7HE": {"res": [1080, 1920], "length": 16919, "fps": 29.97002997002997}, "1aJSOGb9gaA": {"res": [360, 640], "length": 5463, "fps": 29.97002997002997}, "3h4EC1PGnw8": {"res": [720, 1280], "length": 3355, "fps": 29.97}, "88zdP84jcpM": {"res": [720, 1280], "length": 6484, "fps": 25.0}, "6U20xf4WdwE": {"res": [1080, 1920], "length": 5300, "fps": 29.97002997002997}, "6bVE_fopErQ": {"res": [720, 1280], "length": 5770, "fps": 30.0}, "56Di7V4BHgw": {"res": [1080, 1920], "length": 13134, "fps": 29.97002997002997}, "3m2PIS0Asxg": {"res": [1080, 1920], "length": 9428, "fps": 29.97002997002997}, "1-saQGg__0E": {"res": [720, 1280], "length": 6428, "fps": 29.97002997002997}, "7MI6tU_D6eI": {"res": [1080, 1920], "length": 301, "fps": 29.97002997002997}, "2Sv6ZLHfz7s": {"res": [1080, 1920], "length": 14380, "fps": 30.0}, "-NaUu-UH5wQ": {"res": [1080, 1920], "length": 16012, "fps": 30.0}, "2vItFX_gYQE": {"res": [720, 1280], "length": 16132, "fps": 29.97002997002997}, "0KuJ2t4S_TY": {"res": [720, 1280], "length": 2410, "fps": 30.0}, "28IOz4eyBpU": {"res": [720, 1280], "length": 1223, "fps": 30.0}, "1k2s89desLs": {"res": [720, 1280], "length": 21181, "fps": 30.0}, "0ibHGUofRWQ": {"res": [720, 1280], "length": 26923, "fps": 30.0}, "54RqBmjdq9g": {"res": [360, 640], "length": 356, "fps": 29.97002997002997}, "1_eMgOadAWQ": {"res": [720, 1280], "length": 16844, "fps": 29.97002997002997}, "74CF5OotD2M": {"res": [1080, 1916], "length": 7021, "fps": 30.0}, "46FeExcSee4": {"res": [720, 1280], "length": 8333, "fps": 30.0}, "7J5MFYoqEZk": {"res": [720, 1280], "length": 3180, "fps": 24.078}, "3czV8z7v_6M": {"res": [1080, 1920], "length": 14097, "fps": 25.0}, "0mlwYkrlmGs": {"res": [480, 854], "length": 43360, "fps": 29.97002997002997}, "6eOtI9sKQj8": {"res": [720, 1280], "length": 7457, "fps": 30.0}, "2aQhjfDc_nw": {"res": [1080, 1920], "length": 13945, "fps": 30.0}, "3o2N8DXMD6s": {"res": [1080, 1920], "length": 2895, "fps": 29.97002997002997}, "5j_RYzaXUvo": {"res": [1080, 1920], "length": 11762, "fps": 29.97002997002997}, "5dsbpDoszc8": {"res": [1080, 1920], "length": 272, "fps": 29.97002997002997}, "5abEmGOTs2Y": {"res": [720, 1280], "length": 17889, "fps": 29.97002997002997}, "--pq96V-6DA": {"res": [1080, 1920], "length": 7354, "fps": 29.97002997002997}, "3h9xu5UF2CU": {"res": [480, 640], "length": 4717, "fps": 29.97002997002997}, "3yHjg_u2MjA": {"res": [720, 1280], "length": 4097, "fps": 23.976023976023978}, "-L3_lgFu2aw": {"res": [1080, 1920], "length": 15629, "fps": 29.97002997002997}, "6y4R_EYLT4A": {"res": [1080, 1920], "length": 14382, "fps": 23.976023976023978}, "7Ed0xWifFhk": {"res": [1080, 1920], "length": 13798, "fps": 29.97002997002997}, "4odaOC3oZio": {"res": [1080, 1920], "length": 14607, "fps": 25.0}, "6o8TQ9xQMKw": {"res": [720, 1280], "length": 5099, "fps": 29.97002997002997}, "1NsA3utS6To": {"res": [480, 640], "length": 5833, "fps": 29.97002997002997}, "83El0jvP6RU": {"res": [1080, 1920], "length": 10931, "fps": 30.0}, "50iETKggGHg": {"res": [1080, 1920], "length": 12814, "fps": 29.97002997002997}, "2ejWzp2faAA": {"res": [480, 640], "length": 10618, "fps": 29.97002997002997}, "0HzztPiFelY": {"res": [1080, 1920], "length": 21500, "fps": 23.976023976023978}, "2e-uo8_3lJ0": {"res": [1080, 1920], "length": 20151, "fps": 29.97002997002997}, "6_mArr1eOcY": {"res": [720, 1280], "length": 378, "fps": 30.0}, "5ez-0HvSchw": {"res": [720, 1280], "length": 21879, "fps": 30.0}, "3IlxESnsAxA": {"res": [480, 640], "length": 44180, "fps": 29.97002997002997}, "4-M3xo-BkAE": {"res": [720, 1280], "length": 15949, "fps": 29.97002997002997}, "6yQyyAsuTUc": {"res": [1080, 1920], "length": 323, "fps": 29.97002997002997}, "14X6oChjjT4": {"res": [1080, 1920], "length": 1743, "fps": 29.97002997002997}, "7-jgp1YRxE0": {"res": [1080, 1920], "length": 10945, "fps": 30.0}, "6S1uetXcEYY": {"res": [720, 1080], "length": 4362, "fps": 30.0}, "Jv5DjxNK4xI": {"res": [720, 1280], "length": 20197, "fps": 30.0}, "3ucoQx_3l_k": {"res": [1080, 1920], "length": 7418, "fps": 29.97002997002997}, "4ABoKW0KDLQ": {"res": [720, 1280], "length": 5555, "fps": 29.97002997002997}, "-B_KgSrmTEk": {"res": [1080, 1920], "length": 1233, "fps": 29.97002997002997}, "RtHZPZM5gtU": {"res": [720, 1280], "length": 10184, "fps": 23.976023976023978}, "22oJoUgnTdM": {"res": [720, 1080], "length": 1003, "fps": 17.152}, "6UkyFG5dw1s": {"res": [1080, 1920], "length": 6607, "fps": 30.0}, "0udTXFEOTp8": {"res": [1080, 1920], "length": 5753, "fps": 23.976023976023978}, "1c1aGRdDYB8": {"res": [720, 1280], "length": 21235, "fps": 29.97002997002997}, "7DhwocwS5G4": {"res": [720, 1280], "length": 14397, "fps": 29.97002997002997}, "5oBpHPqwbSE": {"res": [1080, 1920], "length": 18419, "fps": 29.97002997002997}, "4Ll6DW1-A2k": {"res": [720, 1280], "length": 13231, "fps": 30.0}, "3njc7bhBUNw": {"res": [1080, 1920], "length": 8593, "fps": 30.0}, "3WLY88Swc_I": {"res": [1080, 1920], "length": 3532, "fps": 23.976023976023978}, "0JCeMZ4Q3XQ": {"res": [1080, 1920], "length": 25060, "fps": 30.0}, "-SQFIbscsAI": {"res": [1080, 1920], "length": 1465, "fps": 29.97002997002997}, "6Cfk45eDysw": {"res": [1080, 1920], "length": 3073, "fps": 29.97002997002997}, "7zo9ltH4lGk": {"res": [360, 640], "length": 1142, "fps": 30.0}, "-AkxhB4r_2Y": {"res": [720, 1280], "length": 3846, "fps": 23.976023976023978}, "UBj9H6z6Uxw": {"res": [720, 1280], "length": 5521, "fps": 25.0}, "0UR9cREKeUI": {"res": [720, 1280], "length": 3884, "fps": 30.0}, "7kZWu85AbSk": {"res": [1080, 1440], "length": 13255, "fps": 29.97002997002997}, "0jlHk5s3JR0": {"res": [1080, 1920], "length": 14527, "fps": 30.0}, "7LjxYvf-e6c": {"res": [1080, 1920], "length": 5641, "fps": 29.97002997002997}, "6IpM9X9eVvc": {"res": [1080, 1920], "length": 1721, "fps": 29.97002997002997}, "2udA--Kb2vo": {"res": [720, 1280], "length": 3163, "fps": 29.97002997002997}, "-M7CostYoIM": {"res": [1080, 1920], "length": 6839, "fps": 29.97002997002997}, "3pKwGJ55kYM": {"res": [1080, 1920], "length": 17316, "fps": 29.97002997002997}, "3dz9WRT4ovs": {"res": [1080, 1920], "length": 5192, "fps": 23.976023976023978}, "0JWnUGQrtIs": {"res": [1080, 1920], "length": 12929, "fps": 29.97002997002997}, "4dgbTPnuSh0": {"res": [1080, 1920], "length": 7136, "fps": 23.976023976023978}, "3fw1zsnDmbU": {"res": [1080, 608], "length": 12482, "fps": 29.97002997002997}, "5dVs413UwiQ": {"res": [1080, 1920], "length": 18118, "fps": 23.976023976023978}, "4x8XQAPC4Vk3cugR5eQA5s": {"res": [1080, 1920], "length": 41978, "fps": 25.0}, "7X2_Vm9r4cs": {"res": [1080, 1920], "length": 13110, "fps": 30.0}, "2aHOCuMesGc": {"res": [352, 640], "length": 6294, "fps": 25.0}, "2udPD0NYRVo": {"res": [1080, 1920], "length": 20340, "fps": 29.97002997002997}, "4gkoOnKgEAY": {"res": [1080, 1920], "length": 16107, "fps": 23.976023976023978}, "-b8VzJIjbkw": {"res": [1080, 1920], "length": 5753, "fps": 23.976023976023978}, "5555bq_jvnU": {"res": [720, 1280], "length": 7908, "fps": 29.97002997002997}, "7623LuJiAdU": {"res": [720, 1280], "length": 2834, "fps": 29.97002997002997}, "0XtFVS_Kxno": {"res": [720, 1280], "length": 3787, "fps": 30.0}, "60cYTePWEOQ": {"res": [360, 540], "length": 9756, "fps": 29.97002997002997}, "750f5tzGBUc": {"res": [1080, 1920], "length": 6312, "fps": 29.97002997002997}, "7010Pzl3REc": {"res": [1080, 1920], "length": 6843, "fps": 29.97002997002997}, "0TGzKgxeNt4": {"res": [720, 1280], "length": 7216, "fps": 29.97002997002997}, "6QoJyYaYl0g": {"res": [720, 1280], "length": 1359, "fps": 29.97002997002997}, "3y1SKZYMphI": {"res": [720, 1280], "length": 1497, "fps": 30.0}, "6kUJ1Ifkh-E": {"res": [360, 640], "length": 1921, "fps": 30.0}, "3_y5dhzpILw": {"res": [720, 1152], "length": 91238, "fps": 25.0}, "4c_Behrl0ug": {"res": [720, 1280], "length": 5730, "fps": 29.97}, "3FImkVmElOQ": {"res": [720, 1280], "length": 809, "fps": 29.97002997002997}, "1ZByLiG8-3c": {"res": [1080, 1920], "length": 5440, "fps": 23.976023976023978}, "-Uqn0-D5WNk": {"res": [720, 1056], "length": 996, "fps": 29.97002997002997}, "3yQf9GjL5A8": {"res": [720, 1080], "length": 4242, "fps": 29.035}, "7E02KEPyxt0": {"res": [720, 1280], "length": 1480, "fps": 29.97002997002997}, "-384XKXNw_w": {"res": [1080, 1920], "length": 8115, "fps": 23.976023976023978}, "2My87Hqwxm0": {"res": [720, 1280], "length": 6046, "fps": 30.0}, "-Bxu9R-lmFc": {"res": [720, 1280], "length": 836, "fps": 29.97002997002997}, "7MdDUIvNbec": {"res": [1080, 608], "length": 826, "fps": 30.0}, "8-sswQ3Cw3A": {"res": [1080, 1920], "length": 3035, "fps": 25.0}, "1so4-zBOcbk": {"res": [1080, 1920], "length": 31509, "fps": 29.97002997002997}, "78PYzJd_6-I": {"res": [1080, 1920], "length": 1863, "fps": 29.97002997002997}, "6wN8EyQmaHM": {"res": [1080, 1920], "length": 7167, "fps": 29.97002997002997}, "SlksdQT5JRE": {"res": [720, 1080], "length": 946, "fps": 17.166666666666668}, "MC6xjO1JoR8": {"res": [720, 1280], "length": 6144, "fps": 23.976023976023978}, "4x-y1RBxqPw": {"res": [720, 1280], "length": 5660, "fps": 29.97002997002997}, "62bCk_LZVAY": {"res": [480, 640], "length": 1528, "fps": 30.0}, "2u9kx7Jlw7g": {"res": [720, 1280], "length": 4041, "fps": 30.0}, "2QCyl_nQHXo": {"res": [720, 1280], "length": 107796, "fps": 29.97}, "5BpIwpzT1wA": {"res": [1080, 1920], "length": 1792, "fps": 30.0}, "4ckqY82422o": {"res": [1080, 1920], "length": 29912, "fps": 30.0}, "2fxtI59FmNs": {"res": [1080, 1920], "length": 6769, "fps": 29.97002997002997}, "6mmsUCfyXuw": {"res": [720, 1280], "length": 6731, "fps": 30.0}, "3JH21s6kBoY": {"res": [720, 1280], "length": 2735, "fps": 30.0}, "4MsFYjC9HoI": {"res": [1080, 1920], "length": 1766, "fps": 30.0}, "3derzMnQLDY": {"res": [1080, 1920], "length": 4065, "fps": 29.97002997002997}, "2M83YRjnHWs": {"res": [720, 1280], "length": 9770, "fps": 29.97002997002997}, "5CbboaGLRaQ": {"res": [720, 1280], "length": 4927, "fps": 29.97002997002997}, "53-hCnr1Pk8": {"res": [1080, 1920], "length": 9501, "fps": 23.976023976023978}, "-4Vny515GWw": {"res": [720, 1280], "length": 1580, "fps": 30.0}, "5iRIeHAK9b0": {"res": [1080, 1920], "length": 12118, "fps": 30.0}, "5z2ucg2g0SE": {"res": [720, 1280], "length": 9579, "fps": 23.976023976023978}, "2hz6TDn6qSU": {"res": [480, 854], "length": 2228, "fps": 24.0}, "0KkGcaUZfLM": {"res": [720, 1280], "length": 11909, "fps": 30.0}, "3ML4x_33Mv4": {"res": [720, 1280], "length": 46066, "fps": 29.97002997002997}, "6Bcp3dNPqkk": {"res": [360, 480], "length": 9460, "fps": 29.97002997002997}, "1ZrUru3Bov4": {"res": [1080, 1920], "length": 1666, "fps": 29.97002997002997}, "0OTrKoliOTc": {"res": [1080, 1920], "length": 18851, "fps": 29.97002997002997}, "7Yq3PG6vhWM": {"res": [480, 640], "length": 6121, "fps": 29.97002997002997}, "2crx1P7KT1s": {"res": [480, 640], "length": 3810, "fps": 30.0}, "0ozQhjCEq8g": {"res": [1080, 1920], "length": 14688, "fps": 29.97002997002997}, "3b8R8nQVzE8": {"res": [720, 1280], "length": 20269, "fps": 29.97002997002997}, "5t1nmidxKHY": {"res": [1080, 1920], "length": 2293, "fps": 29.97002997002997}, "2epZdTtZYig": {"res": [1080, 1920], "length": 12873, "fps": 29.97002997002997}, "79N7Tn2fDjM": {"res": [720, 1280], "length": 4151, "fps": 23.976023976023978}, "3gWIt6KUn2Y": {"res": [1080, 1080], "length": 7921, "fps": 29.97002997002997}, "2O6QUmPT0gM": {"res": [360, 640], "length": 9398, "fps": 29.97002997002997}, "88wlTGLWPgk": {"res": [1080, 1920], "length": 8022, "fps": 29.97002997002997}, "6jMFTzwklmk": {"res": [480, 632], "length": 107854, "fps": 29.97002997002997}, "1B4ZrxTcOBw": {"res": [1080, 1920], "length": 50128, "fps": 29.97002997002997}, "50muznl1uOY": {"res": [1080, 608], "length": 342, "fps": 30.0}, "0ElPjqY4Cq8": {"res": [1080, 608], "length": 726, "fps": 29.887}, "7ztoTdD13xM": {"res": [1080, 1920], "length": 1793, "fps": 29.97002997002997}, "4nzkodp42-8": {"res": [1080, 1920], "length": 18493, "fps": 29.97002997002997}, "1UE60O6dsiU": {"res": [720, 1280], "length": 36711, "fps": 29.97002997002997}, "2vrTA8pLu0w": {"res": [1080, 1920], "length": 2066, "fps": 30.0}, "2L2J7l3odjw": {"res": [1080, 1920], "length": 4047, "fps": 30.0}, "-GgW0Hrt6ow": {"res": [1080, 1920], "length": 13465, "fps": 29.97002997002997}, "4PEdbmHc2Xw": {"res": [480, 848], "length": 2016, "fps": 29.942}, "0FYqFEA9pTQ": {"res": [1920, 1080], "length": 16347, "fps": 30.0}, "6u2G586wS-o": {"res": [1080, 1920], "length": 23462, "fps": 29.97002997002997}, "6YHvAwZPXjo": {"res": [1080, 1920], "length": 1739, "fps": 29.97002997002997}, "3pGRATrswG0": {"res": [720, 1280], "length": 294, "fps": 30.0}, "1YkeunxfWDI": {"res": [360, 638], "length": 1582, "fps": 30.0}, "2Wh3TbK77Ec": {"res": [720, 1280], "length": 2465, "fps": 30.0}, "2ieEZatqP14": {"res": [720, 1280], "length": 6640, "fps": 29.97002997002997}, "-Dycay2gHZU": {"res": [1080, 1920], "length": 11329, "fps": 30.0}, "0fwZ-B8tOlo": {"res": [1080, 608], "length": 1398, "fps": 29.787}, "5OHnJzydoKI": {"res": [1080, 1920], "length": 17333, "fps": 29.97002997002997}, "6HPj3dbZR64": {"res": [720, 1280], "length": 9011, "fps": 30.0}, "3N0atcNismQ": {"res": [1080, 1920], "length": 1364, "fps": 29.97002997002997}, "4PxvE1Rc7mE": {"res": [1080, 1920], "length": 16364, "fps": 25.0}, "4IKRQSTErXw": {"res": [720, 1280], "length": 8606, "fps": 29.97}, "6R7auE0YG3E": {"res": [480, 640], "length": 3885, "fps": 29.97002997002997}, "6ezuklg6QcI": {"res": [720, 1080], "length": 499, "fps": 15.007533902561526}, "53-La6Leb9U": {"res": [1080, 1920], "length": 7784, "fps": 29.97002997002997}, "6DqdD7bbDhI": {"res": [720, 1280], "length": 11801, "fps": 30.0}, "0c_b0ZrIrxA": {"res": [1080, 1920], "length": 6213, "fps": 29.97002997002997}, "3--WciJ3-I4": {"res": [1080, 1920], "length": 5753, "fps": 23.976023976023978}, "4-4b4lJXvWE": {"res": [720, 1280], "length": 18234, "fps": 30.0}, "4PtPO1g4LXA": {"res": [1080, 1080], "length": 524, "fps": 29.94296577946768}, "2EUZtPTfMs4": {"res": [1080, 1920], "length": 1483, "fps": 29.97002997002997}, "4W-g21npdkg": {"res": [720, 1280], "length": 22179, "fps": 23.976023976023978}, "2qDnzhpeD18": {"res": [1080, 1920], "length": 2555, "fps": 29.97002997002997}, "xkgar0jaf1d": {"res": [1080, 1294], "length": 2842, "fps": 29.97002997002997}, "0bX29nB6Pe0": {"res": [720, 1280], "length": 2033, "fps": 30.0}, "-ZOyG_dW_1M": {"res": [1080, 1920], "length": 20945, "fps": 24.0}, "-UkcUmFj6ls": {"res": [1080, 1920], "length": 1916, "fps": 30.0}, "5TeY1R1UAmw": {"res": [360, 640], "length": 3031, "fps": 29.97002997002997}, "68NfC8V2vbY": {"res": [720, 1280], "length": 4569, "fps": 30.0}, "5OQaJcyenPQ": {"res": [480, 854], "length": 2156, "fps": 29.97002997002997}, "-KgBBru7lps": {"res": [720, 1280], "length": 2569, "fps": 29.97002997002997}, "4zxsXJFD-Yc": {"res": [1080, 1920], "length": 5591, "fps": 29.97002997002997}, "3_b9cFm9HpM": {"res": [1080, 1920], "length": 4361, "fps": 24.0}, "1cT9kp1pI8A": {"res": [1080, 1920], "length": 24583, "fps": 23.976023976023978}, "3Hw3dbQHVTQ": {"res": [1080, 1920], "length": 5688, "fps": 29.97002997002997}, "-_ojvPJp0c0": {"res": [1080, 1920], "length": 2503, "fps": 29.97002997002997}, "6wqBCzGIUkc": {"res": [1080, 1920], "length": 4645, "fps": 23.976023976023978}, "5C-vRcATQGg": {"res": [720, 1280], "length": 7209, "fps": 29.97002997002997}, "15uD4UdvOGU": {"res": [1080, 1920], "length": 4474, "fps": 23.976023976023978}, "4saUnm634k0": {"res": [360, 640], "length": 7086, "fps": 23.976023976023978}, "1zB3R6tR5Ow": {"res": [720, 1280], "length": 29970, "fps": 30.0}, "3SZ1w2UTkqQ": {"res": [360, 640], "length": 8458, "fps": 29.97002997002997}, "689vmfYsFyk": {"res": [360, 640], "length": 9009, "fps": 29.97}, "2-8Ef9iHTe4": {"res": [720, 1280], "length": 16655, "fps": 30.0}, "-Fyp3iYp_Mo": {"res": [1080, 1920], "length": 1563, "fps": 28.52}, "2zA0YgKb5ug": {"res": [1080, 1920], "length": 1092, "fps": 29.97002997002997}, "54S9SzY3rC8": {"res": [1080, 1920], "length": 11956, "fps": 24.0}, "--6bmFM9wT4": {"res": [1080, 1920], "length": 18887, "fps": 29.97002997002997}, "6RwmiOsDKuM": {"res": [720, 1280], "length": 1014, "fps": 20.72}, "-XFkh8wlNUQ": {"res": [720, 1280], "length": 4553, "fps": 29.97002997002997}, "23tQHcy3VnI": {"res": [720, 1280], "length": 9194, "fps": 29.97002997002997}, "52JFNfg00D0": {"res": [1080, 1920], "length": 5977, "fps": 29.97002997002997}, "lhsEFO1I83s": {"res": [720, 1280], "length": 5993, "fps": 29.97}, "4bKBp0Ut14A": {"res": [480, 640], "length": 10743, "fps": 30.0}, "4gZ86yRcJcM": {"res": [454, 854], "length": 909, "fps": 29.871001031991746}, "-Tp_qLcc02c": {"res": [1080, 1920], "length": 78224, "fps": 29.97002997002997}, "5LuS-TTg7rA": {"res": [720, 1280], "length": 2294, "fps": 29.97002997002997}, "4ZUgccpo8Ww": {"res": [720, 1280], "length": 5307, "fps": 29.97002997002997}, "48tu-4L5rog": {"res": [480, 640], "length": 9005, "fps": 30.0}, "1eHR1Kopsus": {"res": [1080, 1920], "length": 3576, "fps": 29.97002997002997}, "3M2Q8yH652U": {"res": [1080, 1920], "length": 749, "fps": 23.976023976023978}, "4Qa2r-L4euE": {"res": [1080, 1920], "length": 6941, "fps": 25.0}, "1shDjV2kuYI": {"res": [1080, 1920], "length": 22427, "fps": 29.97002997002997}, "1ZQlKQsY0_0": {"res": [720, 1280], "length": 12635, "fps": 29.97002997002997}, "0vqnjAecNI4": {"res": [720, 1280], "length": 9260, "fps": 29.97002997002997}, "44U1RQK_pJg": {"res": [1080, 1920], "length": 702, "fps": 29.97002997002997}, "6WF9mjmxwv8": {"res": [720, 1280], "length": 2561, "fps": 29.97002997002997}, "1VGMYEzJHjM": {"res": [1080, 1920], "length": 1197, "fps": 30.0}, "25xI45bNg5E": {"res": [480, 640], "length": 13969, "fps": 29.848}, "3GXIBwgAYzQ": {"res": [720, 1280], "length": 4019, "fps": 29.97002997002997}, "4eTllnsMOUU": {"res": [360, 640], "length": 3368, "fps": 30.0}, "2QKVspjZM-w": {"res": [1080, 1920], "length": 3137, "fps": 29.97002997002997}, "6Ohv6_N88yA": {"res": [454, 854], "length": 540, "fps": 29.623700623700625}, "3KX4beTnCYA": {"res": [720, 1280], "length": 6378, "fps": 30.0}, "0Q3Ik0o3JuA": {"res": [1080, 1920], "length": 6596, "fps": 29.97002997002997}, "3Uhjd1gWESY": {"res": [1080, 1920], "length": 1623, "fps": 29.97002997002997}, "11bYjaf1gWc": {"res": [1080, 1920], "length": 9577, "fps": 29.97002997002997}, "-NWZsLFjA7A": {"res": [1080, 1920], "length": 16264, "fps": 30.0}, "35-h2dH-sf0": {"res": [720, 1280], "length": 5065, "fps": 29.97002997002997}, "0rq4nkWlO2E": {"res": [360, 640], "length": 728, "fps": 29.97002997002997}, "69F9hv4AIIY": {"res": [480, 640], "length": 2918, "fps": 29.97002997002997}, "3HwFRZYTivI": {"res": [480, 640], "length": 1814, "fps": 29.97002997002997}, "79in9ZSvhR0": {"res": [720, 1280], "length": 40891, "fps": 29.97002997002997}, "3I8RmyajZ0M": {"res": [1080, 1920], "length": 1125, "fps": 23.976023976023978}, "4yQPADthGbs": {"res": [1080, 1920], "length": 14112, "fps": 23.976023976023978}, "20Qt7rkcAY0": {"res": [1080, 1920], "length": 1581, "fps": 29.97002997002997}, "3kr0l3g1zv4": {"res": [720, 1280], "length": 1625, "fps": 24.0}, "28d21FpzwpM": {"res": [720, 1280], "length": 1047, "fps": 29.97002997002997}, "36HZE61wfS0": {"res": [1080, 1920], "length": 735, "fps": 29.97002997002997}, "1cZAKuFpckI": {"res": [1080, 1920], "length": 490, "fps": 30.0}, "koOvrKENdH8": {"res": [1080, 1920], "length": 2786, "fps": 29.97002997002997}, "5CwLVXIScNg": {"res": [1080, 1920], "length": 7274, "fps": 30.0}, "1yKuwUSJXjQ": {"res": [1080, 608], "length": 25289, "fps": 29.878}, "3JGY9iG_hwA": {"res": [720, 1280], "length": 19987, "fps": 30.0}, "27mbVu4v49w": {"res": [720, 1280], "length": 4100, "fps": 30.0}, "7KRfSa2aVXw": {"res": [480, 640], "length": 1184, "fps": 9.0}, "35N28rtz66I": {"res": [1080, 1920], "length": 20553, "fps": 30.0}, "1ky1L6jzdAQ": {"res": [1080, 1920], "length": 30245, "fps": 30.0}, "6FFs2jMg3Qk": {"res": [1080, 608], "length": 927, "fps": 29.0}, "4uYuzZSGl1w": {"res": [720, 1280], "length": 6104, "fps": 29.97002997002997}, "7_E5HtKvX94": {"res": [1080, 1920], "length": 13200, "fps": 30.0}, "0hGaPpGSvmc": {"res": [720, 1280], "length": 6745, "fps": 25.0}, "5udtD0-KjlA": {"res": [480, 854], "length": 7637, "fps": 29.97002997002997}, "41ZdIWjZilo": {"res": [1080, 1920], "length": 17781, "fps": 29.97002997002997}, "67C8WkVF2gM": {"res": [1080, 1920], "length": 2741, "fps": 29.97002997002997}, "7Mmse6ucEno": {"res": [720, 1280], "length": 6630, "fps": 30.0}, "7-uJbyWnwfU": {"res": [1080, 1920], "length": 9448, "fps": 29.97002997002997}, "6BTqHNQPpsU": {"res": [1080, 608], "length": 3320, "fps": 30.0}, "6ibLm_CpHbY": {"res": [720, 1280], "length": 7453, "fps": 29.97002997002997}, "vQsycsq1ib8": {"res": [720, 1280], "length": 7835, "fps": 23.976023976023978}, "0sJIlX1BvBc": {"res": [1080, 1920], "length": 13531, "fps": 29.97002997002997}, "1ZudkF3P6t0": {"res": [1080, 1920], "length": 8358, "fps": 29.97002997002997}, "756s3Ex0HBc": {"res": [720, 1280], "length": 10892, "fps": 29.97002997002997}, "-TtP7XW613w": {"res": [720, 1280], "length": 2840, "fps": 29.97002997002997}, "1RHbfw12FPE": {"res": [1280, 720], "length": 6021, "fps": 30.0}, "5Xoby_UY_bM": {"res": [720, 1280], "length": 10672, "fps": 30.0}, "2DGjhMDkwTk": {"res": [720, 1280], "length": 8919, "fps": 30.0}, "7P5LkLmvkXY": {"res": [480, 854], "length": 6135, "fps": 29.771001150747985}, "2hxTkuo6b9Q": {"res": [360, 640], "length": 635, "fps": 20.0}, "4qREV2Ggyjs": {"res": [1080, 1920], "length": 4047, "fps": 23.976023976023978}, "7ngVUJlUUb0": {"res": [720, 1280], "length": 8130, "fps": 29.97002997002997}, "7owiXmBxZpk": {"res": [1080, 1920], "length": 9405, "fps": 23.976023976023978}, "1Rl908nVWlY": {"res": [1080, 1920], "length": 9338, "fps": 29.97002997002997}, "1x-8S-NWWzo": {"res": [1080, 1920], "length": 6292, "fps": 23.976023976023978}, "3jMyATzSm_k": {"res": [1080, 1920], "length": 4024, "fps": 30.0}, "75iciNFxEsM": {"res": [720, 1280], "length": 3203, "fps": 29.97002997002997}, "20nY5jgoY8w": {"res": [1080, 1920], "length": 868, "fps": 29.97002997002997}, "6aBG1FmV5DM": {"res": [720, 1280], "length": 20614, "fps": 29.97002997002997}, "0xLinXeAep8": {"res": [480, 854], "length": 5561, "fps": 29.97002997002997}, "1SfQim8avcU": {"res": [1080, 1920], "length": 18409, "fps": 30.0}, "7uqGLK2WvUY": {"res": [720, 1280], "length": 6297, "fps": 29.97002997002997}, "1W9pXUryFvY": {"res": [1080, 1920], "length": 2316, "fps": 29.97002997002997}, "7OBsKgNMT_8": {"res": [1080, 1920], "length": 1181, "fps": 23.976023976023978}, "2QB-YdxXSMY": {"res": [720, 1280], "length": 2435, "fps": 30.0}, "7G-741kjo0A": {"res": [720, 1280], "length": 1729, "fps": 25.0}, "3ZqyH6907T8": {"res": [1080, 1920], "length": 1218, "fps": 29.97002997002997}, "5Tiq42RvEJo": {"res": [720, 1280], "length": 7158, "fps": 29.97002997002997}, "6C6NLUHDKHs": {"res": [1080, 1920], "length": 1947, "fps": 29.97002997002997}, "2qLS_mXdrh8": {"res": [720, 1280], "length": 6914, "fps": 29.97002997002997}, "1ORJ2LYyjtg": {"res": [1080, 1920], "length": 9941, "fps": 23.976023976023978}, "4THwQJrqWcs": {"res": [1080, 1920], "length": 10473, "fps": 23.976023976023978}, "1CfBoWJ8kAY": {"res": [1080, 1920], "length": 15164, "fps": 29.97002997002997}, "3DizOjlwHng": {"res": [360, 640], "length": 2239, "fps": 25.0}, "4g1KM2HIZUA": {"res": [720, 1280], "length": 4289, "fps": 29.97002997002997}, "1t3EJprOWEw": {"res": [1080, 1920], "length": 3787, "fps": 30.0}, "0zv14ho2z2U": {"res": [1080, 1920], "length": 300, "fps": 29.97002997002997}, "-Zrf6jWiFZs": {"res": [720, 1280], "length": 9212, "fps": 29.97002997002997}, "2xqV9Ttjmmg": {"res": [1080, 1920], "length": 1508, "fps": 29.97002997002997}, "1FbKP8vZvdM": {"res": [1080, 1920], "length": 19838, "fps": 29.97002997002997}, "0XuYL6bKxa8": {"res": [1080, 1920], "length": 2126, "fps": 23.976023976023978}, "3xGoX0gMhnU": {"res": [1080, 1920], "length": 17799, "fps": 29.97002997002997}, "3oFYnCrBfaU": {"res": [480, 640], "length": 15345, "fps": 29.97002997002997}, "6O7xQpT3As4": {"res": [720, 1280], "length": 1589, "fps": 30.0}, "1Do7AfsaVeI": {"res": [1080, 1920], "length": 1744, "fps": 30.0}, "5DolK2hufSM": {"res": [1080, 1920], "length": 28639, "fps": 23.976023976023978}, "31sHy2-FC70": {"res": [1080, 1920], "length": 35986, "fps": 24.0}, "3Iu3rAesooU": {"res": [1080, 1920], "length": 4364, "fps": 25.0}, "4S2hd_PXStQ": {"res": [1080, 1920], "length": 1960, "fps": 29.97002997002997}, "33brDkaix3A": {"res": [1080, 1920], "length": 2862, "fps": 29.97002997002997}, "7f96j98SBos": {"res": [1080, 1920], "length": 13993, "fps": 29.97002997002997}, "-URViDyH9U0": {"res": [720, 1280], "length": 1166, "fps": 30.0}, "-JL59Zq5vPY": {"res": [720, 1280], "length": 14361, "fps": 29.97002997002997}, "32oTN0XBKSA": {"res": [1080, 1920], "length": 11239, "fps": 29.97002997002997}, "7fVAl0IGt4M": {"res": [720, 1280], "length": 239, "fps": 23.976023976023978}, "6AlXgGoCUug": {"res": [720, 1280], "length": 5911, "fps": 29.97}, "7gryEPTAHG4": {"res": [720, 1280], "length": 11735, "fps": 29.85}, "7cFJnTx2xeA": {"res": [720, 1280], "length": 12839, "fps": 17.599}, "7zvqwTMTRfQ": {"res": [1080, 1920], "length": 6915, "fps": 29.97002997002997}, "18uLbZs9g3k": {"res": [720, 1280], "length": 11087, "fps": 30.0}, "6KGxITv68XA": {"res": [720, 1280], "length": 431, "fps": 30.0}, "4p-fzZWqY8I": {"res": [360, 640], "length": 6960, "fps": 30.0}, "5evhc3T_Wdc": {"res": [720, 1280], "length": 7747, "fps": 30.0}, "11qYnjz3sfo": {"res": [480, 640], "length": 1587, "fps": 29.97002997002997}, "-0VIwubCjpM": {"res": [1080, 1920], "length": 2264, "fps": 29.97002997002997}, "6ID_GzmmHiI": {"res": [1080, 1920], "length": 15124, "fps": 29.97002997002997}, "3oOzZryjCLM": {"res": [720, 1280], "length": 3537, "fps": 29.97}, "6_Ck2G58R_U": {"res": [720, 1280], "length": 3774, "fps": 30.0}, "7JR-Z3DB7dY": {"res": [1080, 1920], "length": 5753, "fps": 23.976023976023978}, "3DdyXFmj6kg": {"res": [480, 854], "length": 12825, "fps": 29.97002997002997}, "1sPp3Oqs_Oc": {"res": [360, 640], "length": 3408, "fps": 25.0}, "5HRMnHWLUro": {"res": [720, 1280], "length": 8917, "fps": 29.97002997002997}, "226CKUNss6k": {"res": [720, 1280], "length": 3561, "fps": 30.0}, "2KPB9UJDEUU": {"res": [1080, 608], "length": 1344, "fps": 30.0}, "0ivltlK21Ns": {"res": [1080, 1920], "length": 29233, "fps": 23.976023976023978}, "5yNCsWu2S-c": {"res": [480, 854], "length": 2806, "fps": 29.97002997002997}, "7OVCgCEuf1o": {"res": [1080, 1920], "length": 8200, "fps": 25.0}, "6W38H2Tmixs": {"res": [720, 1280], "length": 1909, "fps": 29.97002997002997}, "51b6A1NYEjw": {"res": [720, 1280], "length": 5564, "fps": 29.97002997002997}, "0YR_i6PVM5o": {"res": [720, 1280], "length": 6218, "fps": 29.97002997002997}, "7gCDYekQAmc": {"res": [480, 854], "length": 11517, "fps": 30.0}, "7BQa7_fr24I": {"res": [720, 1280], "length": 7610, "fps": 29.97002997002997}, "2K6sNH3QdM4": {"res": [1080, 1920], "length": 25965, "fps": 23.976023976023978}, "6fjvNHU3utk": {"res": [720, 1280], "length": 15180, "fps": 29.97}, "6DAmc4GJ2Kw": {"res": [1080, 1920], "length": 9515, "fps": 23.976023976023978}, "1MuENBUZYtc": {"res": [1080, 1920], "length": 14805, "fps": 29.97002997002997}, "75RIqb2TUME": {"res": [1080, 1920], "length": 1575, "fps": 29.767001114827202}, "1mrADzwE4Ho": {"res": [720, 1280], "length": 2513, "fps": 29.97}, "2rUWPvdAVMw": {"res": [1080, 1920], "length": 10331, "fps": 23.976023976023978}, "39FygidlZio": {"res": [720, 1280], "length": 6127, "fps": 30.0}, "778IkUV2mxw": {"res": [1012, 1920], "length": 3239, "fps": 23.976023976023978}, "2NJXip1avxY": {"res": [1080, 1920], "length": 5023, "fps": 30.0}, "1yycWziALCE": {"res": [720, 1280], "length": 14847, "fps": 30.0}, "3OOGFTrAle0": {"res": [720, 1280], "length": 8855, "fps": 29.97}, "-P-luyYjEbg": {"res": [1080, 1920], "length": 1225, "fps": 29.97002997002997}, "4yIKyUws_FM": {"res": [480, 640], "length": 1594, "fps": 29.97002997002997}, "5_k1TypH_s8": {"res": [720, 1280], "length": 20948, "fps": 30.0}, "4K2k1galkMU": {"res": [720, 1280], "length": 1632, "fps": 29.97002997002997}, "0qb8BQ3lPb4": {"res": [480, 640], "length": 8056, "fps": 29.88}, "6KwcF1SxswU": {"res": [720, 1280], "length": 2616, "fps": 29.97002997002997}, "38P2wuD5bBg": {"res": [720, 1280], "length": 2126, "fps": 29.831619537275063}, "1qyp7Io-qq0": {"res": [1080, 1920], "length": 8261, "fps": 29.97002997002997}, "5guTB1PtB7I": {"res": [720, 1280], "length": 8318, "fps": 29.97}, "0bIQ-0ExK9c": {"res": [720, 1280], "length": 15521, "fps": 29.97002997002997}, "4eNt91uV02o": {"res": [480, 854], "length": 1662, "fps": 29.97002997002997}, "4NqvQVEyqX4": {"res": [1080, 1920], "length": 1417, "fps": 25.0}, "1hQ3N5wsyeI": {"res": [360, 640], "length": 5122, "fps": 30.0}, "6GzmwiLo5X8": {"res": [720, 1280], "length": 5494, "fps": 30.0}, "3h6Izx48g3k": {"res": [1080, 1920], "length": 3426, "fps": 25.0}, "2uqfhF-Of_U": {"res": [720, 1280], "length": 10961, "fps": 29.97002997002997}, "0fkFHKeQnTE": {"res": [720, 1280], "length": 11258, "fps": 29.97002997002997}, "3IN1R9rs45k": {"res": [720, 1280], "length": 4464, "fps": 24.078}, "3FxpOFlPYQU": {"res": [360, 640], "length": 4668, "fps": 30.0}, "16FY4shEniw": {"res": [1080, 1920], "length": 3520, "fps": 23.976023976023978}, "7TAfOplFDGs": {"res": [720, 1280], "length": 2416, "fps": 29.97002997002997}, "3WB3Ca1ocq8": {"res": [1080, 1920], "length": 14318, "fps": 30.0}, "5ELtUAtflns": {"res": [1080, 1920], "length": 523, "fps": 29.97002997002997}, "1qrxdNZgyiU": {"res": [720, 1280], "length": 6381, "fps": 29.97002997002997}, "7GLXUu1NgsE": {"res": [1080, 1920], "length": 27060, "fps": 30.0}, "5Tt8PNI1bpc": {"res": [720, 1280], "length": 1327, "fps": 29.954853273137697}, "32uDIDeinnY": {"res": [1080, 1920], "length": 6726, "fps": 25.0}, "4T1oN1flsBI": {"res": [720, 1280], "length": 9869, "fps": 23.976023976023978}, "791GHmxK14k": {"res": [1080, 1920], "length": 11168, "fps": 23.976023976023978}, "-PReNU3L8Lc": {"res": [720, 1280], "length": 2007, "fps": 30.0}, "0uSvqu1JBrQ": {"res": [1080, 1920], "length": 1722, "fps": 30.0}, "4Up1W41Xyfg": {"res": [720, 1080], "length": 714, "fps": 17.0}, "4Z2vZZOY7x4": {"res": [1080, 1920], "length": 616, "fps": 30.0}, "5tlBrcZz5-g": {"res": [1080, 1920], "length": 574, "fps": 23.976023976023978}, "1J11MLT62oI": {"res": [1080, 1920], "length": 8863, "fps": 30.0}, "4I8qR7MPOFI": {"res": [360, 640], "length": 2705, "fps": 30.0}, "3poB5VELd34": {"res": [1080, 1920], "length": 17380, "fps": 29.97002997002997}, "-Zl-cALyuy8": {"res": [1080, 1920], "length": 19610, "fps": 29.97002997002997}, "50cuZcvlcDk": {"res": [1080, 1920], "length": 4459, "fps": 23.976023976023978}, "1IMJXby62gg": {"res": [480, 854], "length": 1306, "fps": 29.97002997002997}, "2Wsw0KI-yQg": {"res": [1080, 1920], "length": 29279, "fps": 24.0}, "1Nf5NScNUJo": {"res": [1080, 1920], "length": 5100, "fps": 29.97002997002997}, "0qtZASvLIak": {"res": [1080, 1920], "length": 6529, "fps": 29.97002997002997}, "4DDLH3SlvTM": {"res": [720, 1280], "length": 3290, "fps": 29.97002997002997}, "0xdOLtyqA5o": {"res": [480, 640], "length": 2107, "fps": 29.97002997002997}, "-HV6JF-7Suc": {"res": [720, 1280], "length": 5194, "fps": 29.97002997002997}, "0IOv-YuOTb0": {"res": [1080, 1920], "length": 285, "fps": 29.97002997002997}, "2Tgl8gxz_HA": {"res": [720, 1280], "length": 3664, "fps": 29.97002997002997}, "1NNpTjkA-Vc": {"res": [1080, 1920], "length": 5753, "fps": 23.976023976023978}, "3-cl4u7Cy3w": {"res": [720, 1280], "length": 24187, "fps": 30.0}, "237k5BL8gu0": {"res": [1080, 1920], "length": 4092, "fps": 30.0}, "4vfuSkczIcI": {"res": [1080, 1920], "length": 7707, "fps": 29.97002997002997}, "2uCE3KqKwG8": {"res": [360, 640], "length": 9784, "fps": 30.0}, "11gzqTBlm5Q": {"res": [480, 854], "length": 4280, "fps": 29.97002997002997}, "2CBMZcrvOdA": {"res": [1080, 1920], "length": 2283, "fps": 29.97002997002997}, "4ZyG8I6BhPA": {"res": [720, 1280], "length": 22826, "fps": 30.0}, "0l_YxGEsYDg": {"res": [720, 1280], "length": 19526, "fps": 30.0}, "6YRIw7EFtwA": {"res": [720, 1080], "length": 2806, "fps": 30.0}, "-OtYFXHq9S0": {"res": [1080, 1920], "length": 2823, "fps": 29.97002997002997}, "6NddBOft2TQ": {"res": [360, 640], "length": 22516, "fps": 29.97}, "4__R2jLPocE": {"res": [1080, 1920], "length": 6917, "fps": 29.97002997002997}, "4NqquchUdXc": {"res": [480, 640], "length": 5748, "fps": 29.97002997002997}, "-XA4cUQhpwQ": {"res": [1080, 1920], "length": 9789, "fps": 23.976023976023978}, "5EY2MMBprFc": {"res": [454, 854], "length": 464, "fps": 29.142857142857142}, "2XGXHraw8IM": {"res": [1080, 1920], "length": 1197, "fps": 29.97002997002997}, "0TyvE4zINHM": {"res": [720, 1280], "length": 1363, "fps": 29.97002997002997}, "4Fe7_ieumkE": {"res": [720, 1280], "length": 2164, "fps": 29.97002997002997}, "5kpJiAa0K50": {"res": [1080, 1920], "length": 14566, "fps": 25.0}, "6T4SVukmRfM": {"res": [480, 640], "length": 3394, "fps": 29.97002997002997}, "5_kA8RIsUjY": {"res": [720, 1280], "length": 2271, "fps": 23.976023976023978}, "5CmHRhBa16c": {"res": [480, 640], "length": 5285, "fps": 30.0}, "41HO4Sv9o1E": {"res": [1080, 1920], "length": 19159, "fps": 29.97002997002997}, "7TfLTF5NGrw": {"res": [1080, 1920], "length": 7542, "fps": 29.97002997002997}, "0ozA6CspdcQ": {"res": [1080, 1920], "length": 18682, "fps": 29.97002997002997}, "5F8L1mWySQY": {"res": [1080, 1920], "length": 20366, "fps": 29.97002997002997}, "-UMxbwKKx0g": {"res": [720, 1280], "length": 7527, "fps": 29.97002997002997}, "-4BE5VUpsjE": {"res": [480, 654], "length": 3938, "fps": 29.97002997002997}, "4m2eE_yGmek": {"res": [1080, 1920], "length": 9354, "fps": 25.0}, "7_PkG5n9d_o": {"res": [1080, 1920], "length": 6755, "fps": 29.97002997002997}, "7eEYUNL9c3s": {"res": [720, 1280], "length": 3966, "fps": 29.97002997002997}, "360Rh2nWAJc": {"res": [1080, 1920], "length": 3901, "fps": 29.97002997002997}, "5CA7kIQbLN8": {"res": [1080, 1920], "length": 3060, "fps": 29.97002997002997}, "7xnD31Kv66s": {"res": [1080, 1920], "length": 1471, "fps": 29.775}, "27NNuHzlhmI": {"res": [720, 1280], "length": 2757, "fps": 29.97002997002997}, "4OaYHvsqeSw": {"res": [356, 640], "length": 8775, "fps": 29.97002997002997}, "7vouCzNfICY": {"res": [1080, 1920], "length": 5753, "fps": 23.976023976023978}, "3Vg-kKfUHzI": {"res": [1080, 1920], "length": 17719, "fps": 29.97002997002997}, "0zIK6G6DdTI": {"res": [1080, 1920], "length": 22422, "fps": 29.97002997002997}, "0yd0yQX7HbE": {"res": [1080, 1920], "length": 8462, "fps": 29.97002997002997}, "7l0FsFQj-MA": {"res": [480, 654], "length": 19071, "fps": 23.976023976023978}, "3BLLfcJPXZI": {"res": [720, 1080], "length": 5243, "fps": 30.0}, "-2ix-iOwouI": {"res": [1080, 1920], "length": 7494, "fps": 30.0}, "0lFE6N7UKUQ": {"res": [1080, 608], "length": 12560, "fps": 29.97002997002997}, "7_1niIcqc-I": {"res": [720, 1280], "length": 2988, "fps": 29.97}, "4cqnw_Nqp78": {"res": [1080, 1920], "length": 1278, "fps": 29.97002997002997}, "4YIYRu2PERg": {"res": [1080, 1920], "length": 7377, "fps": 29.97002997002997}, "3gtOMI7t5kY": {"res": [720, 1280], "length": 523, "fps": 29.97002997002997}, "6ojK5S2q7kQ": {"res": [1080, 1080], "length": 2707, "fps": 23.976023976023978}, "1C6E-HFQE30": {"res": [1080, 1920], "length": 8723, "fps": 23.976023976023978}, "0FXHt0ifSvM": {"res": [720, 1280], "length": 11169, "fps": 29.97}, "2sGQuduhAf4": {"res": [1080, 1920], "length": 6847, "fps": 29.97002997002997}, "1rqPKHu84ZM": {"res": [1080, 1920], "length": 1895, "fps": 19.98001998001998}, "1d4h-XvO4Ik": {"res": [480, 640], "length": 7608, "fps": 30.0}, "0VsyxGx1-8Y": {"res": [480, 854], "length": 5591, "fps": 29.97002997002997}, "53RV_A340Ng": {"res": [1080, 608], "length": 6420, "fps": 30.0}, "0RFVhML8EWc": {"res": [1080, 1920], "length": 16590, "fps": 30.0}, "-IpTsfnThMc": {"res": [720, 1280], "length": 1346, "fps": 29.97}, "1-G5t0JX_kY": {"res": [720, 1280], "length": 5431, "fps": 29.97002997002997}, "4u2JoBoGgVE": {"res": [1080, 1920], "length": 17590, "fps": 25.0}, "2hjV5sd9uoc": {"res": [720, 1280], "length": 4354, "fps": 24.0}, "2ElgTUIWRCI": {"res": [720, 1280], "length": 2643, "fps": 29.97002997002997}, "25MZKXnMWQs": {"res": [720, 1280], "length": 11395, "fps": 30.0}, "88f52dxKnyU": {"res": [1080, 1920], "length": 2625, "fps": 23.976023976023978}, "59cfb28PcEo": {"res": [1080, 1920], "length": 3603, "fps": 30.0}, "68zIPtkTBG8": {"res": [1080, 1920], "length": 3009, "fps": 23.976023976023978}, "66VDsxPC_EU": {"res": [720, 1280], "length": 13168, "fps": 29.97002997002997}, "28ZEVTynFMg": {"res": [720, 1280], "length": 6001, "fps": 29.97002997002997}, "197wXR66szM": {"res": [720, 1280], "length": 7625, "fps": 30.0}, "7M9LKpbbm4U": {"res": [720, 1280], "length": 4220, "fps": 30.0}, "6pTpjuf_75U": {"res": [720, 1280], "length": 1541, "fps": 29.97002997002997}, "7FuLN4IsWgU": {"res": [720, 1280], "length": 4077, "fps": 29.97002997002997}, "1OyluMpqBxg": {"res": [1080, 1920], "length": 2140, "fps": 23.976023976023978}, "5yi5kIPV6I8": {"res": [1080, 1920], "length": 10180, "fps": 23.976023976023978}, "2V5Gr96tJPU": {"res": [1080, 1920], "length": 2855, "fps": 29.97002997002997}, "4qLOKzfa-SA": {"res": [720, 1280], "length": 1478, "fps": 29.97002997002997}, "74_FA5oD8ZY": {"res": [1080, 1920], "length": 2038, "fps": 23.976023976023978}, "6rFKaoMJwq0": {"res": [720, 1280], "length": 2701, "fps": 24.0}, "3W8pr0tiijs": {"res": [720, 1280], "length": 6928, "fps": 29.97002997002997}, "2D43l5lh20k": {"res": [480, 854], "length": 11246, "fps": 29.97002997002997}, "2ITtrEoHXt0": {"res": [1080, 1920], "length": 19344, "fps": 30.0}, "1zUrig_s5pU": {"res": [1080, 1920], "length": 4913, "fps": 23.976023976023978}, "1PM8NOrXLMc": {"res": [1080, 1920], "length": 5177, "fps": 25.0}, "0Pp2AcPbHnw": {"res": [1080, 1920], "length": 6124, "fps": 29.97002997002997}, "5lC0wRPioPY": {"res": [720, 1280], "length": 4929, "fps": 29.97002997002997}, "4gyIuBIdqug": {"res": [720, 1280], "length": 18135, "fps": 30.0}, "2Vi5CJMgJMk": {"res": [720, 1280], "length": 4778, "fps": 29.97002997002997}, "3rIjxXrLDLM": {"res": [1080, 1920], "length": 942, "fps": 29.97002997002997}, "2jS6r1T92Rs": {"res": [1080, 1920], "length": 10225, "fps": 29.97002997002997}, "6hamX26lB1E": {"res": [720, 1280], "length": 6246, "fps": 29.338}, "3kK7lt2QG-0": {"res": [480, 640], "length": 10320, "fps": 30.0}, "5gFYtIKARVg": {"res": [1080, 1920], "length": 3931, "fps": 29.97002997002997}, "3vDdDt3iiUw": {"res": [720, 1280], "length": 19818, "fps": 30.0}, "288uT_Z4DYY": {"res": [480, 640], "length": 2877, "fps": 30.0}, "4SVP0ShrpN0": {"res": [1080, 1920], "length": 8607, "fps": 29.97002997002997}, "-KyL6AuA-OY": {"res": [720, 1280], "length": 5701, "fps": 29.97002997002997}, "7ZLktACfQ_w": {"res": [1080, 1920], "length": 4367, "fps": 29.97002997002997}, "35TDOiOf70s": {"res": [720, 1080], "length": 4329, "fps": 29.97002997002997}, "3kqxBxvrnDI": {"res": [720, 1280], "length": 15699, "fps": 29.97002997002997}, "0oJp4HiGlqw": {"res": [360, 640], "length": 7198, "fps": 29.97}, "3CkCXFkAlTU": {"res": [720, 1280], "length": 3944, "fps": 15.0}, "28fU5WIzmdw": {"res": [1080, 1920], "length": 5411, "fps": 29.97002997002997}, "6qkP0YHxv1Y": {"res": [720, 1280], "length": 9993, "fps": 30.0}, "3DQO_pUD8AU": {"res": [1080, 1920], "length": 13374, "fps": 29.97002997002997}, "668bu_vGg7M": {"res": [1080, 1920], "length": 7903, "fps": 29.97002997002997}, "7mPRfXp5Zg4": {"res": [1080, 1920], "length": 2155, "fps": 30.0}, "4i0Y_mFJejI": {"res": [1080, 1920], "length": 12122, "fps": 29.97002997002997}, "-9aGqJpaN7c": {"res": [720, 1280], "length": 356, "fps": 30.0}, "16yp4zWo9Jo": {"res": [720, 1280], "length": 2728, "fps": 29.97002997002997}, "7-MY3ZvAU_4": {"res": [720, 1280], "length": 11082, "fps": 30.0}, "7rcbarZ0Z-g": {"res": [720, 1280], "length": 6509, "fps": 29.97002997002997}, "-QIt5kz85fk": {"res": [720, 1280], "length": 4711, "fps": 29.97002997002997}, "61yO6Y8dgXw": {"res": [1080, 1920], "length": 5915, "fps": 29.97002997002997}, "4COOEoCrd2A": {"res": [1080, 608], "length": 496, "fps": 30.0}, "1hMbLtmDzgg": {"res": [1080, 1920], "length": 6285, "fps": 29.97002997002997}, "6BnYsIZu6Fs": {"res": [1080, 1920], "length": 8927, "fps": 23.976023976023978}, "7TQB29PBDac": {"res": [720, 1280], "length": 3890, "fps": 30.0}, "6uZac3anNM0": {"res": [1080, 1920], "length": 5811, "fps": 29.97002997002997}, "kHdu1COtuyU": {"res": [720, 1280], "length": 1476, "fps": 25.0}, "1xUM9uqsexQ": {"res": [1080, 1920], "length": 9747, "fps": 23.976023976023978}, "72DGd2dVFto": {"res": [1080, 1920], "length": 1914, "fps": 29.97002997002997}, "2U-Kx9ipG7w": {"res": [720, 1280], "length": 1360, "fps": 30.0}, "5c9HcCzoD9g": {"res": [720, 1280], "length": 1054, "fps": 30.0}, "69MfuRgP9i8": {"res": [1080, 1920], "length": 7677, "fps": 29.97002997002997}, "MomSITt84wY": {"res": [720, 1280], "length": 4599, "fps": 25.0}, "-MuhF_l4zJA": {"res": [720, 1280], "length": 1411, "fps": 23.976023976023978}, "dIjHxptKWRI": {"res": [720, 1280], "length": 19508, "fps": 30.0}, "3i_qlSgSOmg": {"res": [720, 1280], "length": 2815, "fps": 29.97002997002997}, "3EFxi5bVkLw": {"res": [1080, 1920], "length": 2750, "fps": 29.97002997002997}, "66DfOiRRnVI": {"res": [1080, 1920], "length": 2550, "fps": 23.976023976023978}, "4bLd9NNVSqk": {"res": [720, 1280], "length": 989, "fps": 29.97002997002997}, "2n2GpS3HlCw": {"res": [1080, 1920], "length": 4879, "fps": 15.0}, "7OiURbHu65U": {"res": [720, 1280], "length": 5213, "fps": 29.97002997002997}, "2ApWoBjni40": {"res": [1080, 1920], "length": 2542, "fps": 29.97002997002997}, "2DMDODY3SXo": {"res": [1080, 1920], "length": 6857, "fps": 30.0}, "39WUKURehFM": {"res": [1080, 1920], "length": 23619, "fps": 29.97002997002997}, "1nOjFG9hRwk": {"res": [1080, 1920], "length": 1792, "fps": 29.97002997002997}, "3Rge35dKOfo": {"res": [360, 640], "length": 1688, "fps": 29.97002997002997}, "3ETlr8Ybsaw": {"res": [720, 1280], "length": 761, "fps": 29.97002997002997}, "3G6L41pZoQw": {"res": [720, 1280], "length": 2786, "fps": 30.0}, "5XFDbIBXzzw": {"res": [360, 640], "length": 5039, "fps": 30.0}, "75FxA85GrM0": {"res": [1080, 1920], "length": 12655, "fps": 30.0}, "72lOmM3mZXQ": {"res": [720, 1280], "length": 21670, "fps": 29.97002997002997}, "2zj0EooJVG4": {"res": [1080, 1920], "length": 2492, "fps": 29.97002997002997}, "17K1sRYGkCo": {"res": [1080, 1678], "length": 1874, "fps": 15.0}, "0TgSMo2M8G8": {"res": [720, 1280], "length": 5326, "fps": 23.976023976023978}, "7LCL-sC8leI": {"res": [720, 1280], "length": 1063, "fps": 29.943661971830984}, "0nWekM-XgC8": {"res": [1080, 1920], "length": 5769, "fps": 29.97002997002997}, "5m5QwN7g3xQ": {"res": [1080, 1920], "length": 10342, "fps": 30.0}, "6SDaIOVMZqg": {"res": [720, 1280], "length": 6256, "fps": 29.97002997002997}, "2tyJZ0oBFc0": {"res": [1080, 1920], "length": 6173, "fps": 29.97002997002997}, "5ZNOtMysCI4": {"res": [720, 1280], "length": 5678, "fps": 29.97002997002997}, "3v52wUN8A70": {"res": [480, 640], "length": 4564, "fps": 29.97002997002997}, "87mnJCsI62c": {"res": [1080, 1920], "length": 11370, "fps": 30.0}, "0iJ5ac1feUo": {"res": [1080, 1920], "length": 13765, "fps": 29.97002997002997}, "6FpbWUYAxbQ": {"res": [1080, 1920], "length": 5680, "fps": 29.97002997002997}, "5Ke194FDKvw": {"res": [1080, 1920], "length": 20319, "fps": 29.97002997002997}, "6pA61ZOXdv8": {"res": [720, 1280], "length": 20835, "fps": 29.97002997002997}, "21-vLPZ2nak": {"res": [1080, 1920], "length": 3154, "fps": 30.0}, "1aIPXe8ETE0": {"res": [1080, 1920], "length": 14883, "fps": 30.0}, "1R5EsvdyODA": {"res": [1080, 1920], "length": 4500, "fps": 29.97002997002997}, "6jZqwf--CSY": {"res": [720, 1280], "length": 9837, "fps": 29.97002997002997}, "49JeXNggYDU": {"res": [480, 854], "length": 5737, "fps": 29.97002997002997}, "1bGoCYeXEoc": {"res": [720, 1280], "length": 19016, "fps": 30.0}, "7ZxGAEkLZ5w": {"res": [720, 1280], "length": 4737, "fps": 25.0}, "3Q1YafjvXw0": {"res": [720, 1280], "length": 19886, "fps": 29.97002997002997}, "71vyx6s25i4": {"res": [1080, 1920], "length": 6200, "fps": 29.97002997002997}, "5gCo9Gf8tqg": {"res": [1080, 1920], "length": 9586, "fps": 23.976023976023978}, "0J4_26aTVF8": {"res": [1080, 1920], "length": 1495, "fps": 29.853}, "0SoHPxQjFqM": {"res": [720, 1280], "length": 1487, "fps": 30.0}, "6QuKxaUXUnY": {"res": [1080, 1920], "length": 24113, "fps": 29.97002997002997}, "1OFwW1YN3Go": {"res": [720, 1280], "length": 5561, "fps": 29.97002997002997}, "2XeUE6rvKSo": {"res": [1080, 1920], "length": 10415, "fps": 23.976023976023978}, "3TEjYQa_4DU": {"res": [480, 854], "length": 5732, "fps": 29.97002997002997}, "7K5B6bzWrNI": {"res": [720, 1280], "length": 8658, "fps": 29.97002997002997}, "78RGXKxj03Q": {"res": [1080, 1920], "length": 2110, "fps": 29.97002997002997}, "43sM8Rod3F4": {"res": [360, 202], "length": 6413, "fps": 30.0}, "5q1DJ6ZsLIM": {"res": [1080, 1920], "length": 338, "fps": 29.97002997002997}, "2JtgxM_AbU0": {"res": [1080, 608], "length": 1823, "fps": 30.0}, "3Q9umXcc3UI": {"res": [1080, 1920], "length": 4871, "fps": 23.976023976023978}, "3zBcV4VQ2lg": {"res": [1080, 1920], "length": 14337, "fps": 29.97002997002997}, "4w67nCFypJk": {"res": [1080, 1920], "length": 628, "fps": 29.952305246422892}, "wgiYwFfcs9Y": {"res": [1080, 1920], "length": 7124, "fps": 29.97002997002997}, "6V1xAb5Nx2U": {"res": [720, 1280], "length": 2249, "fps": 29.97002997002997}, "0uW4g4KEFMA": {"res": [1080, 1920], "length": 6636, "fps": 29.97002997002997}, "64ZKS9P88Hs": {"res": [720, 1280], "length": 2801, "fps": 30.0}, "-bKqEMhE4g8": {"res": [720, 1280], "length": 3508, "fps": 29.97}, "3VnprO5BPiU": {"res": [480, 854], "length": 1373, "fps": 29.97002997002997}, "-FuhRkxZf3w": {"res": [720, 1280], "length": 17391, "fps": 16.666666666666668}, "5a3mFrRaPO0": {"res": [1080, 1920], "length": 6544, "fps": 29.97002997002997}, "7ahWYmvvPg0": {"res": [720, 1280], "length": 798, "fps": 29.97002997002997}, "5GKJH3kAWDo": {"res": [480, 848], "length": 4012, "fps": 29.943}, "27h0DG012oo": {"res": [720, 1280], "length": 5068, "fps": 29.97002997002997}, "1Aa4aJ76A4Q": {"res": [1080, 1920], "length": 1736, "fps": 29.97002997002997}, "3vMmICCkT5M": {"res": [1080, 608], "length": 7387, "fps": 29.0}, "3_kaSHhZja0": {"res": [720, 1080], "length": 4215, "fps": 17.159144893111637}, "0KtceAKU6W0": {"res": [720, 1280], "length": 2303, "fps": 29.97002997002997}, "-2eTHYdyhCY": {"res": [1080, 1920], "length": 6234, "fps": 29.97002997002997}, "5CBKiYdRDKg": {"res": [720, 1280], "length": 2138, "fps": 29.97002997002997}, "0qRV0ujlWSc": {"res": [1080, 1920], "length": 1986, "fps": 29.97002997002997}, "6rUNdP7fKmM": {"res": [720, 1280], "length": 656, "fps": 23.976023976023978}, "-Q3RVYAZlRA": {"res": [1080, 1920], "length": 2030, "fps": 29.97002997002997}, "3-kN_02oiTs": {"res": [1080, 1920], "length": 5463, "fps": 23.976023976023978}, "0Eh7r1WiwIk": {"res": [1080, 1920], "length": 19723, "fps": 24.0}, "2p4co-iv4gw": {"res": [1080, 1920], "length": 8433, "fps": 23.976023976023978}, "4oi9AzzjO7s": {"res": [720, 1280], "length": 6200, "fps": 30.0}, "5cZnqZFZ7Ho": {"res": [720, 1280], "length": 12196, "fps": 29.97002997002997}, "89GMXufKDOI": {"res": [720, 1280], "length": 2947, "fps": 29.97}, "6kEtXRo9i9w": {"res": [720, 1280], "length": 1871, "fps": 29.97}, "bzNKQ2FkEJI": {"res": [720, 1280], "length": 38378, "fps": 23.976023976023978}, "5aMMG0d-Qfg": {"res": [720, 1280], "length": 3526, "fps": 29.97002997002997}, "3HCloEJpk08": {"res": [1080, 1920], "length": 7949, "fps": 29.97002997002997}, "1W05nmX9T38": {"res": [1080, 1920], "length": 8363, "fps": 25.0}, "616c90GXGXo": {"res": [720, 1280], "length": 5391, "fps": 30.0}, "30mxrl7Tetc": {"res": [1080, 1920], "length": 7153, "fps": 29.97002997002997}, "6kBF9kpK6FA": {"res": [1080, 1920], "length": 9873, "fps": 29.97002997002997}, "348qudHxncw": {"res": [720, 1280], "length": 5288, "fps": 29.97002997002997}, "6FXrKHL9HJE": {"res": [1080, 1920], "length": 8127, "fps": 24.0}, "7aCix71Y-L4": {"res": [720, 1280], "length": 2042, "fps": 25.0}, "4CZCJPhojUc": {"res": [1080, 1920], "length": 22950, "fps": 23.976023976023978}, "Kcr3v9H_rkI": {"res": [676, 1280], "length": 5458, "fps": 29.97}, "41NjBCt1OKM": {"res": [1080, 1920], "length": 7338, "fps": 29.97002997002997}, "5sXEQQsl470": {"res": [480, 640], "length": 6323, "fps": 24.0}, "14diAi-BS40": {"res": [1080, 1920], "length": 31166, "fps": 23.976023976023978}, "2C__aKaFRlE": {"res": [1080, 1920], "length": 5611, "fps": 23.976023976023978}, "0Klp2jioXa0": {"res": [720, 1280], "length": 5882, "fps": 24.078}, "2STv7x4XU5g": {"res": [1080, 1920], "length": 7013, "fps": 29.97002997002997}, "5eWlGWCPPKE": {"res": [1080, 1920], "length": 16352, "fps": 24.0}, "1kNszi0e9XU": {"res": [720, 1280], "length": 10619, "fps": 29.97002997002997}, "4hrVz2SxOBI": {"res": [480, 854], "length": 8002, "fps": 29.97002997002997}, "-UpzTBtlVcw": {"res": [1080, 608], "length": 540, "fps": 29.97002997002997}, "2M7O1eSiIhg": {"res": [720, 1280], "length": 3372, "fps": 29.97002997002997}, "5D_0BQ8VvJI": {"res": [1080, 1920], "length": 3524, "fps": 23.976023976023978}, "6F6rQFlMEEQ": {"res": [720, 1280], "length": 4229, "fps": 29.97002997002997}, "7TFuYF-v3tk": {"res": [720, 1280], "length": 4514, "fps": 30.0}, "1hKx1BL1vVE": {"res": [1080, 1920], "length": 1772, "fps": 30.0}, "5s6cdVbujI8": {"res": [1080, 1920], "length": 18114, "fps": 24.0}, "3Pe2tVHkHvg": {"res": [720, 1280], "length": 16094, "fps": 29.97002997002997}, "2FqUerxjxXY": {"res": [692, 1280], "length": 1850, "fps": 30.0}, "5o5G3j2wEuM": {"res": [720, 1152], "length": 23335, "fps": 30.0}, "7_7CfrNR2dA": {"res": [720, 1280], "length": 6155, "fps": 29.97002997002997}, "5f4Cmp-B-6E": {"res": [1080, 1920], "length": 22534, "fps": 29.97002997002997}, "3Yp1vQwpBUs": {"res": [1080, 1920], "length": 5881, "fps": 23.976023976023978}, "7F4kSiCL-Hw": {"res": [1080, 1920], "length": 534, "fps": 29.97002997002997}, "1Q9Mo892UTU": {"res": [1080, 1920], "length": 32380, "fps": 23.976023976023978}, "61abgYPeeA4": {"res": [1080, 1920], "length": 589, "fps": 23.976023976023978}, "2-0BoNb-3DE": {"res": [1080, 1920], "length": 16438, "fps": 29.97002997002997}, "76XaFz1ysIU": {"res": [360, 640], "length": 2583, "fps": 29.97002997002997}, "1fCuZxuGHGM": {"res": [1080, 1920], "length": 10342, "fps": 29.97002997002997}, "2ctCiA0emHo": {"res": [720, 1280], "length": 18206, "fps": 29.97002997002997}, "7qDsKIM5nOk": {"res": [1080, 1920], "length": 3425, "fps": 29.721}, "5k7YZO5nR0U": {"res": [720, 1280], "length": 4409, "fps": 29.97002997002997}, "2AENol2KoQ0": {"res": [1080, 1920], "length": 15498, "fps": 29.97002997002997}, "7CP3dGmaiwA": {"res": [480, 848], "length": 2445, "fps": 30.0}, "27sFWKqaXI4": {"res": [720, 1920], "length": 132157, "fps": 25.0}, "0xLXcCbWtyY": {"res": [720, 1280], "length": 6981, "fps": 29.97002997002997}, "2D3EdAi-hYM": {"res": [1080, 1920], "length": 4420, "fps": 23.976023976023978}, "6GnSzk_WHWo": {"res": [1080, 1920], "length": 17907, "fps": 29.97002997002997}, "4Fo8DhZ4b1I": {"res": [1080, 1920], "length": 5317, "fps": 23.976023976023978}, "164Zlx6gOFA": {"res": [1080, 1920], "length": 4968, "fps": 30.0}, "5bbFWqaeQwk": {"res": [1080, 1920], "length": 5610, "fps": 29.97002997002997}, "6fSoOlpPKt8": {"res": [720, 1280], "length": 4753, "fps": 25.0}, "4K4OjRaKu9Y": {"res": [1080, 1920], "length": 12624, "fps": 25.0}, "-7AQlPgDPN0": {"res": [1080, 1920], "length": 17750, "fps": 29.97002997002997}, "5PL2c0jpLsE": {"res": [720, 1280], "length": 9363, "fps": 29.97002997002997}, "7CyI8OaIPEg": {"res": [1080, 1920], "length": 11499, "fps": 23.976023976023978}, "2zr9Zg832CI": {"res": [720, 1280], "length": 3193, "fps": 29.97002997002997}, "1xzjrdPv3h0": {"res": [720, 1280], "length": 12073, "fps": 29.97002997002997}, "5MzVQwQlnI0": {"res": [720, 1280], "length": 8536, "fps": 29.97002997002997}, "1wWh42fayfg": {"res": [1080, 1920], "length": 39967, "fps": 29.97002997002997}, "-XgLtisTWW0": {"res": [1080, 1920], "length": 5497, "fps": 29.97002997002997}, "4Sp_9wH8hLs": {"res": [1080, 1920], "length": 15008, "fps": 29.97002997002997}, "2HCnifZR1pw": {"res": [720, 1280], "length": 19929, "fps": 30.0}, "31426ICxTW8": {"res": [1080, 1920], "length": 2827, "fps": 29.97002997002997}, "215r9CyOX-o": {"res": [360, 640], "length": 9862, "fps": 30.0}, "2Lzdxq-ToJk": {"res": [1080, 1920], "length": 3612, "fps": 29.97002997002997}, "6y8pAvRbl1s": {"res": [1080, 1920], "length": 16156, "fps": 29.97002997002997}, "5JIZ561YOV0": {"res": [720, 1280], "length": 7843, "fps": 29.97002997002997}, "6_99QnfuVms": {"res": [720, 1280], "length": 5873, "fps": 29.97002997002997}, "1Lt_v3pYtw8": {"res": [360, 640], "length": 4182, "fps": 29.97002997002997}, "3S1ms3hJo0Y": {"res": [720, 1280], "length": 3673, "fps": 24.0}, "56GYhfEs8QM": {"res": [720, 1280], "length": 2737, "fps": 29.97}, "AE1q9TxgLhY": {"res": [720, 1280], "length": 17610, "fps": 30.0}, "6-RRTuj0pLg": {"res": [720, 1080], "length": 3223, "fps": 15.0}, "Hv8sflBRMOU": {"res": [654, 1278], "length": 5510, "fps": 30.0}, "5XrevyCC3fY": {"res": [720, 1080], "length": 25101, "fps": 30.0}, "1v1OBDb4JRs": {"res": [1080, 1920], "length": 3398, "fps": 29.97002997002997}, "1-Y2MUXSfik": {"res": [720, 1280], "length": 3287, "fps": 29.97002997002997}, "3cBm0uHL8Ig": {"res": [720, 1280], "length": 2154, "fps": 30.0}, "3ss_OZvCQLI": {"res": [1080, 1920], "length": 8634, "fps": 29.97002997002997}, "4uEoFmApOsw": {"res": [720, 1080], "length": 839, "fps": 30.0}, "0bw1VchkoSo": {"res": [470, 854], "length": 1893, "fps": 29.97002997002997}, "0dMMGRq3EaI": {"res": [480, 640], "length": 11081, "fps": 30.0}, "25mGwxZhCAQ": {"res": [720, 1280], "length": 5282, "fps": 29.97002997002997}, "0tjM8lemVKI": {"res": [1080, 1920], "length": 11544, "fps": 30.0}, "3jdiiCmAcpU": {"res": [1080, 1920], "length": 12302, "fps": 29.97002997002997}, "6UBndyjRUKU": {"res": [360, 640], "length": 11239, "fps": 29.97}, "3h7veASAUaw": {"res": [720, 1280], "length": 23690, "fps": 29.97002997002997}, "2ChVL6Ji4Ns": {"res": [720, 1280], "length": 4232, "fps": 24.0}, "5mvbzMFRcIY": {"res": [1080, 1920], "length": 14818, "fps": 29.97002997002997}, "75TJp8fFCeg": {"res": [720, 1280], "length": 6853, "fps": 30.0}, "2ahpTXhHcLc": {"res": [470, 854], "length": 5795, "fps": 29.97002997002997}, "6czb8zhu-1g": {"res": [1080, 1920], "length": 17030, "fps": 29.97002997002997}, "1oKcvVXNKbk": {"res": [1080, 1920], "length": 2585, "fps": 30.0}, "2D78-5p3s5E": {"res": [480, 854], "length": 2219, "fps": 29.97002997002997}, "1_KY3LF4SGE": {"res": [720, 1280], "length": 3272, "fps": 29.97002997002997}, "7HPzuROoVnk": {"res": [720, 1280], "length": 2358, "fps": 23.976023976023978}, "6bSXQj7PRjc": {"res": [1080, 1920], "length": 43331, "fps": 29.97002997002997}, "54YZ2zfYJvw": {"res": [1080, 1920], "length": 282, "fps": 29.97002997002997}, "802_VdCJHsU": {"res": [1080, 1920], "length": 17344, "fps": 30.0}, "4GnFufO-8Yc": {"res": [1080, 1920], "length": 9315, "fps": 29.97002997002997}, "4h4RTnvEMq8": {"res": [720, 1280], "length": 2522, "fps": 24.0}, "5T9JlkEJigM": {"res": [720, 1280], "length": 6106, "fps": 29.97002997002997}, "2UfduP22NdM": {"res": [1080, 1920], "length": 3395, "fps": 29.97002997002997}, "7BgQM7l2ZY0": {"res": [1080, 1920], "length": 13556, "fps": 29.97002997002997}, "-SZ-Qeo0-20": {"res": [360, 640], "length": 367, "fps": 29.97002997002997}, "3i2DRkx2q-0": {"res": [720, 1280], "length": 7889, "fps": 29.97002997002997}, "7l2FlmcIlpo": {"res": [720, 1280], "length": 12294, "fps": 30.0}, "4gAMK2wL7UY": {"res": [1080, 1920], "length": 17680, "fps": 30.0}, "2izKV5FbIuc": {"res": [1080, 1920], "length": 24906, "fps": 29.97002997002997}, "2KlWfbJNWUE": {"res": [1080, 1920], "length": 4855, "fps": 30.0}, "0I7h9AmcusY": {"res": [1080, 1920], "length": 911, "fps": 30.0}, "5pPNasfZk-o": {"res": [720, 1280], "length": 5232, "fps": 30.0}, "22lGgQDiwII": {"res": [720, 1280], "length": 8527, "fps": 29.97002997002997}, "7yYu5s4k8Ig": {"res": [1080, 1920], "length": 20118, "fps": 29.97002997002997}, "3POeu5of0xk": {"res": [720, 1280], "length": 1972, "fps": 29.97}, "5pWfew_lLx8": {"res": [720, 1280], "length": 2509, "fps": 15.083333333333334}, "5b7oKRowCnQ": {"res": [720, 1280], "length": 4393, "fps": 30.0}, "4avr8Dc7i6I": {"res": [720, 1280], "length": 1623, "fps": 29.97002997002997}, "5p2ZOmY542c": {"res": [720, 1280], "length": 10734, "fps": 30.0}, "2Je2fPkKkpQ": {"res": [1080, 1920], "length": 1080, "fps": 23.976023976023978}, "5BBhXDRWQzc": {"res": [720, 1280], "length": 1021, "fps": 29.97}, "5juUzT3gD0Y": {"res": [1080, 1920], "length": 698, "fps": 29.97002997002997}, "-Olb6_XFpEU": {"res": [360, 640], "length": 3630, "fps": 29.97002997002997}, "84iu7THCu_A": {"res": [1080, 1920], "length": 5839, "fps": 23.976023976023978}, "1ulJIZ7pNrs": {"res": [480, 854], "length": 9635, "fps": 29.97002997002997}, "521NvKgUsQE": {"res": [480, 720], "length": 613, "fps": 29.97002997002997}, "3-8fEGeDfK4": {"res": [1080, 1920], "length": 1676, "fps": 29.97002997002997}, "16qygSE343A": {"res": [1080, 1920], "length": 2850, "fps": 30.0}, "3DiLW8AIo6k": {"res": [1080, 1920], "length": 10831, "fps": 23.976023976023978}, "6bMuWpcOvNY": {"res": [720, 1280], "length": 13178, "fps": 29.97002997002997}, "4sV2VXAf9lQ": {"res": [1080, 1920], "length": 8667, "fps": 23.976023976023978}, "7ZPnOwluOrw": {"res": [1080, 1920], "length": 2462, "fps": 29.97002997002997}, "16P8uEqSJyU": {"res": [720, 1280], "length": 6624, "fps": 29.97002997002997}, "1wZ_kWRjc-E": {"res": [1080, 1920], "length": 11741, "fps": 23.976023976023978}, "0Le-U7KC0Vo": {"res": [1080, 1920], "length": 14230, "fps": 29.97002997002997}, "0KcUZlotmlo": {"res": [1080, 1920], "length": 23829, "fps": 30.0}, "3TWp77pF_F8": {"res": [1080, 1080], "length": 4491, "fps": 29.97002997002997}, "1v-MqrbC5Js": {"res": [720, 1280], "length": 6299, "fps": 29.97002997002997}, "4kp2CBOi69c": {"res": [720, 1280], "length": 1444, "fps": 30.0}, "0ZItRubWbrU": {"res": [720, 1280], "length": 3866, "fps": 29.97002997002997}, "4KlZehjW8KQ": {"res": [454, 854], "length": 365, "fps": 29.25}, "3wBZOQfpXWY": {"res": [720, 1280], "length": 923, "fps": 29.97002997002997}, "3QMcGx702tA": {"res": [480, 640], "length": 1230, "fps": 23.976023976023978}, "4nH6b8btqso": {"res": [720, 1280], "length": 7463, "fps": 29.97002997002997}, "5cS9H4HgBgk": {"res": [480, 640], "length": 2554, "fps": 30.0}, "-JkA7SCF_lk": {"res": [718, 1280], "length": 12743, "fps": 29.88}, "-MZpBWGx3ok": {"res": [480, 640], "length": 2669, "fps": 30.0}, "1rTMbyap5OU": {"res": [1080, 1920], "length": 22702, "fps": 30.0}, "1XIEO67GJ_w": {"res": [720, 1280], "length": 11290, "fps": 29.97002997002997}, "3jskB4GJJj0": {"res": [1080, 1920], "length": 4274, "fps": 29.97002997002997}, "2uzEPSqKD5U": {"res": [1080, 1920], "length": 1284, "fps": 29.97002997002997}, "-CAKTfoVP18": {"res": [720, 1280], "length": 7942, "fps": 30.0}, "5gJERR3pYfQ": {"res": [480, 854], "length": 10160, "fps": 29.97002997002997}, "2GGsKLviHSA": {"res": [480, 640], "length": 5675, "fps": 29.97002997002997}, "2h_DPMZ9mAo": {"res": [1080, 1920], "length": 30740, "fps": 30.0}, "4l4P3f3mq_s": {"res": [1080, 1920], "length": 1405, "fps": 29.97002997002997}, "1utumpy3Aec": {"res": [480, 854], "length": 3683, "fps": 29.97002997002997}, "2eBL3Qmo6GY": {"res": [1080, 1920], "length": 4705, "fps": 29.97002997002997}, "7xpeNMA_H30": {"res": [1080, 1920], "length": 13484, "fps": 29.97002997002997}, "61q-PJr_UvM": {"res": [720, 1280], "length": 4739, "fps": 29.97002997002997}, "3GbelPeycv0": {"res": [720, 1280], "length": 12792, "fps": 29.97}, "2FsSaOFuIWU": {"res": [720, 1280], "length": 1600, "fps": 30.0}, "5BiUaekR8AA": {"res": [720, 1280], "length": 1353, "fps": 29.97002997002997}, "1MW4skTG0m4": {"res": [720, 1280], "length": 10918, "fps": 30.0}, "4gEtIi8nI2Y": {"res": [1080, 1920], "length": 46844, "fps": 29.949}, "5MUTjks5Dac": {"res": [1080, 1920], "length": 15901, "fps": 29.97002997002997}, "3fvx6ttKFAY": {"res": [720, 1280], "length": 4065, "fps": 30.0}, "3xN-D-Kp47o": {"res": [1080, 1920], "length": 25956, "fps": 29.97002997002997}, "-44hXvy7VZk": {"res": [720, 1280], "length": 90241, "fps": 25.0}, "1hI4oN8u7-4": {"res": [720, 1280], "length": 3957, "fps": 29.97002997002997}, "3MZ_9uG5Rcw": {"res": [480, 640], "length": 6435, "fps": 29.97002997002997}, "6rf3bfocwRE": {"res": [1080, 1920], "length": 4701, "fps": 24.0}, "7MO4gEUj6v0": {"res": [1080, 1920], "length": 23474, "fps": 24.0}, "7yV8wwDSGbk": {"res": [1080, 1920], "length": 2403, "fps": 30.0}, "6ZT9A2g4a6g": {"res": [1080, 1920], "length": 5602, "fps": 29.97002997002997}, "1R90waOdJeM": {"res": [720, 1280], "length": 22301, "fps": 29.97002997002997}, "3V24F9lvHKU": {"res": [720, 1280], "length": 6119, "fps": 29.97002997002997}, "4aOm3KWVZFY": {"res": [720, 1280], "length": 3176, "fps": 29.97002997002997}, "7h1PTqeyiAo": {"res": [1080, 1920], "length": 770, "fps": 29.97002997002997}, "6hU8lU9L2vo": {"res": [1080, 1920], "length": 4693, "fps": 23.976023976023978}, "3eZ1OPXGmNc": {"res": [720, 1280], "length": 6742, "fps": 30.0}, "0yKm_dLloIY": {"res": [1080, 1920], "length": 809, "fps": 23.976023976023978}, "3IzgQqiETPE": {"res": [1080, 1920], "length": 3953, "fps": 29.97002997002997}, "4vwMYzUXyQs": {"res": [1080, 1920], "length": 7386, "fps": 29.97002997002997}, "4JYInRUHjlY": {"res": [1080, 1920], "length": 15409, "fps": 29.97002997002997}, "6cF1wysTBio": {"res": [1080, 1920], "length": 18120, "fps": 29.97002997002997}, "49zNilccJiQ": {"res": [720, 1280], "length": 2553, "fps": 30.0}, "42iSN6RHLoc": {"res": [1080, 1920], "length": 10547, "fps": 29.97002997002997}, "5J8a2CaXWvk": {"res": [1080, 1920], "length": 13850, "fps": 29.97002997002997}, "569YfF-8DsY": {"res": [1080, 1920], "length": 8302, "fps": 29.97002997002997}, "7zzaJ0aKSZU": {"res": [480, 654], "length": 5510, "fps": 23.976023976023978}, "6uJgNJ1xFZg": {"res": [1080, 1920], "length": 11726, "fps": 29.97002997002997}, "2cryaHnc62w": {"res": [480, 854], "length": 2377, "fps": 29.97002997002997}, "6p4UGEPLITk": {"res": [480, 720], "length": 4291, "fps": 30.0}, "7EZtN3yd5_s": {"res": [720, 1280], "length": 3645, "fps": 24.0}, "6C6F4_RKUpE": {"res": [720, 1280], "length": 8198, "fps": 29.97002997002997}, "-JPXg91kzmw": {"res": [1080, 1920], "length": 18437, "fps": 29.97002997002997}, "6CT3qbBqpus": {"res": [720, 1280], "length": 3786, "fps": 30.0}, "4jp6_7JGdW4": {"res": [1080, 1920], "length": 15895, "fps": 23.976023976023978}, "3SgJHumh3f4": {"res": [720, 1280], "length": 8663, "fps": 30.0}, "1euejXNmJg0": {"res": [1080, 1920], "length": 3957, "fps": 29.97002997002997}, "4zsgy6DmJfA": {"res": [1080, 1920], "length": 18834, "fps": 30.0}, "-9yg0HFOW60": {"res": [480, 640], "length": 12151, "fps": 30.0}, "4uT5dsTN4sk": {"res": [720, 1280], "length": 11825, "fps": 29.97002997002997}, "3u8jQzzqmA4": {"res": [1080, 1920], "length": 13155, "fps": 30.0}, "70ddmjZLtJw": {"res": [1080, 1920], "length": 14272, "fps": 29.97002997002997}, "6JhZ8dKoemk": {"res": [1080, 1920], "length": 10480, "fps": 29.97002997002997}, "2gW2DsesmvY": {"res": [720, 1280], "length": 2089, "fps": 29.97002997002997}, "5wRIW4CfphM": {"res": [720, 1280], "length": 2015, "fps": 30.0}, "7KODZvoOFlg": {"res": [454, 854], "length": 514, "fps": 29.424}, "-O61XJs7z1o": {"res": [720, 1280], "length": 2459, "fps": 29.97002997002997}, "3Zqd1BSmx9g": {"res": [1080, 1920], "length": 1792, "fps": 30.0}, "4_833Ptadbw": {"res": [720, 1280], "length": 7510, "fps": 29.97002997002997}, "6f-VDi9dq3g": {"res": [1080, 1920], "length": 12626, "fps": 23.976023976023978}, "42x6FbDy8I0": {"res": [720, 1080], "length": 2248, "fps": 29.97002997002997}, "5Dd8UZUEnvw": {"res": [720, 1280], "length": 2224, "fps": 29.97}, "7T89OwdCd3E": {"res": [720, 1280], "length": 6301, "fps": 29.97002997002997}, "3K0E6OLTD_Y": {"res": [480, 640], "length": 4648, "fps": 25.025}, "4b67wDgYQvQ": {"res": [1080, 1920], "length": 3400, "fps": 29.97002997002997}, "36BGPzlIPg8": {"res": [720, 1280], "length": 3036, "fps": 29.97002997002997}, "-OF1EMDNhPE": {"res": [720, 1280], "length": 13468, "fps": 29.97002997002997}, "0i8aRg4jzfU": {"res": [1080, 1920], "length": 10245, "fps": 23.976023976023978}, "2hgQAzs0FPc": {"res": [720, 1280], "length": 5312, "fps": 29.97002997002997}, "_-ZZ7nuFTkE": {"res": [720, 1280], "length": 10965, "fps": 29.97002997002997}, "0lOuyC7l9OY": {"res": [1080, 1920], "length": 3465, "fps": 29.97002997002997}, "4GAUlwpbYXI": {"res": [1080, 1920], "length": 3779, "fps": 29.97002997002997}, "0xmSyGtz2Rc": {"res": [1080, 1920], "length": 17395, "fps": 23.976023976023978}, "49qZMa3FZmU": {"res": [1080, 1920], "length": 2460, "fps": 29.97002997002997}, "-4ew_GPNCpk": {"res": [720, 1280], "length": 17745, "fps": 24.0}, "7b3-gxp2XrM": {"res": [1080, 1920], "length": 9057, "fps": 23.976023976023978}, "-TI-YrX4Zho": {"res": [1080, 1920], "length": 16846, "fps": 29.97002997002997}, "5803kv4-ug8": {"res": [1080, 1920], "length": 5377, "fps": 23.976023976023978}, "67Oh2y4te2Q": {"res": [720, 1280], "length": 6421, "fps": 29.97}, "-aUYUqPDLkI": {"res": [1080, 1920], "length": 22815, "fps": 24.0}, "6ro7A-XZ2s4": {"res": [1080, 1920], "length": 6868, "fps": 24.0}, "4KIrVLYp9Y0": {"res": [480, 640], "length": 23142, "fps": 24.975024975024976}, "6jAXjIR_ES8": {"res": [1080, 1920], "length": 1153, "fps": 29.97002997002997}, "7bt8cAQrMiI": {"res": [480, 654], "length": 8505, "fps": 23.976023976023978}, "6Vb4EebEJBc": {"res": [1080, 1920], "length": 10533, "fps": 29.97002997002997}, "3KRDQzFPSwc": {"res": [1080, 1920], "length": 15613, "fps": 23.976023976023978}, "2RjGna641y4": {"res": [720, 1280], "length": 12589, "fps": 29.97}, "3vBArm5y3EM": {"res": [480, 640], "length": 3199, "fps": 30.0}, "4ty9rIXKhqw": {"res": [720, 1280], "length": 2200, "fps": 30.0}, "841bomg5250": {"res": [1080, 1920], "length": 1275, "fps": 29.97002997002997}, "4cLQPQI7-yE": {"res": [720, 1280], "length": 939, "fps": 29.97002997002997}, "5yqt_8Ra2H0": {"res": [1080, 1920], "length": 2581, "fps": 30.0}, "81yvX4P3AsU": {"res": [1080, 1920], "length": 8552, "fps": 23.976023976023978}, "5QokwWvvPIc": {"res": [1080, 1920], "length": 13420, "fps": 25.0}, "6KigrppBbT8": {"res": [720, 1280], "length": 3590, "fps": 29.97002997002997}, "-DYr4znK9NI": {"res": [1080, 1920], "length": 6443, "fps": 29.97002997002997}, "wAjN1qGpMdc": {"res": [720, 1280], "length": 23294, "fps": 29.97}, "6JWJd38nW_0": {"res": [720, 1280], "length": 2711, "fps": 25.0}, "3kaUiON2fS8": {"res": [1080, 1920], "length": 9763, "fps": 29.97002997002997}, "0bodeyCThJM": {"res": [1080, 1920], "length": 23094, "fps": 23.976023976023978}, "-_oUXqM2Zjc": {"res": [720, 1280], "length": 23378, "fps": 30.0}, "7v19XbHXVmk": {"res": [480, 854], "length": 2038, "fps": 30.0}, "19UigxUqGnE": {"res": [720, 1280], "length": 13859, "fps": 29.97002997002997}, "2mSvSSCM-2Q": {"res": [720, 1280], "length": 1008, "fps": 19.944}, "2vqBVu88UBc": {"res": [1080, 608], "length": 448, "fps": 29.97002997002997}, "0nQndrjqrUs": {"res": [1080, 1920], "length": 4450, "fps": 30.0}, "2vuhQrSToBU": {"res": [720, 1280], "length": 2892, "fps": 30.0}, "4NArR7wSXzQ": {"res": [720, 1280], "length": 1458, "fps": 23.976023976023978}, "58_nNifjxUI": {"res": [360, 640], "length": 3878, "fps": 29.97002997002997}, "7CLG9cN8ujs": {"res": [720, 1280], "length": 6959, "fps": 29.97002997002997}, "4hmUnw6t5gk": {"res": [480, 854], "length": 17208, "fps": 29.97002997002997}, "58IrPeKRGXU": {"res": [480, 640], "length": 2253, "fps": 25.0}, "4FPGg6blSmg": {"res": [720, 1280], "length": 5100, "fps": 30.0}, "4lFd8WGhyNU": {"res": [1080, 1920], "length": 4687, "fps": 30.0}, "0qyDIvXF8eM": {"res": [360, 640], "length": 12098, "fps": 29.97}, "2e-Wwqiv_5U": {"res": [720, 1280], "length": 766, "fps": 30.0}, "4iDvV-RDVcw": {"res": [720, 1280], "length": 23494, "fps": 25.0}, "0TJNkt3LsBw": {"res": [720, 1280], "length": 16949, "fps": 29.97002997002997}, "3OGZPOmBSR8": {"res": [720, 1280], "length": 4107, "fps": 29.97002997002997}, "6Hsl3AEviTk": {"res": [360, 634], "length": 5346, "fps": 30.0}, "2rqc4PrnxPs": {"res": [1080, 1920], "length": 3630, "fps": 29.97002997002997}, "3cAagvPJiqg": {"res": [1080, 1920], "length": 3144, "fps": 29.97002997002997}, "3vlFMrE2hzo": {"res": [360, 640], "length": 3809, "fps": 30.0}, "DI9mCITMu_k": {"res": [720, 1280], "length": 7323, "fps": 30.0}, "5IOv9y4XxzQ": {"res": [720, 1280], "length": 4898, "fps": 29.97002997002997}, "2wNw5eBurAo": {"res": [720, 1280], "length": 3812, "fps": 29.97002997002997}, "209YWxzAzyQ": {"res": [720, 1280], "length": 10879, "fps": 29.97002997002997}, "1Vl-bI7Oa6c": {"res": [720, 1280], "length": 2816, "fps": 30.0}, "0b3mzxEFxX4": {"res": [1080, 1920], "length": 4271, "fps": 30.0}, "37pUa38ARcc": {"res": [1080, 1920], "length": 1331, "fps": 29.0}, "2msG66_yfBw": {"res": [1080, 1920], "length": 19695, "fps": 23.976023976023978}, "MWQO-BudhrY": {"res": [720, 1280], "length": 6211, "fps": 25.0}, "2wUQUwnxxtE": {"res": [720, 1280], "length": 9486, "fps": 29.97002997002997}, "2Uc3iU6olf0": {"res": [720, 1280], "length": 4851, "fps": 30.0}, "6L61Z9vtr4Y": {"res": [1080, 1920], "length": 7754, "fps": 29.97002997002997}, "7zCjd4tPygo": {"res": [1080, 1920], "length": 514, "fps": 23.976023976023978}, "7Uwp7VIlH4k": {"res": [1080, 1920], "length": 7981, "fps": 30.0}, "YrHpeEwk_-U": {"res": [720, 1280], "length": 18699, "fps": 23.976023976023978}, "7IONUnzZCzk": {"res": [1080, 1920], "length": 14610, "fps": 29.97002997002997}, "86yIhX3BwlU": {"res": [540, 1280], "length": 7576, "fps": 23.976023976023978}, "7cEtmTiPyyk": {"res": [1080, 1920], "length": 35018, "fps": 29.97002997002997}, "4zPLsd-C7cI": {"res": [720, 1280], "length": 2116, "fps": 25.0}, "3v0A16nY1P8": {"res": [1080, 1920], "length": 5918, "fps": 23.976023976023978}, "3SXfajMCua4": {"res": [1080, 1920], "length": 12506, "fps": 23.976023976023978}, "4MpA5tl6uCE": {"res": [1080, 1920], "length": 8868, "fps": 30.0}, "19Knj6oOlSw": {"res": [1080, 608], "length": 869, "fps": 24.0}, "2j1mkNsMyUw": {"res": [1080, 1920], "length": 1129, "fps": 30.0}, "4t8h-sfObpQ": {"res": [1080, 1920], "length": 1316, "fps": 23.976023976023978}, "0eHVehWTtWI": {"res": [1080, 1920], "length": 9332, "fps": 29.97002997002997}, "3ETroZuuwEc": {"res": [480, 654], "length": 3564, "fps": 23.976023976023978}, "3rviqR9755I": {"res": [720, 1280], "length": 2698, "fps": 30.0}, "2uaK3vaJTUA": {"res": [720, 1280], "length": 8731, "fps": 29.97002997002997}, "4uvWca2M2hg": {"res": [1080, 1920], "length": 2403, "fps": 27.898608349900595}, "1L560qaV9aM": {"res": [1080, 608], "length": 415, "fps": 30.0}, "-Z8qLAdmAwo": {"res": [1080, 1920], "length": 32913, "fps": 23.976023976023978}, "zKYPawvcTS4": {"res": [720, 1280], "length": 14365, "fps": 24.0}, "84uhxIHufcY": {"res": [1080, 1920], "length": 12415, "fps": 23.976023976023978}, "6fxABFpi6tw": {"res": [480, 854], "length": 6725, "fps": 29.97002997002997}, "3TdueBtxYRw": {"res": [720, 1280], "length": 3284, "fps": 29.97002997002997}, "6q74MasV6o4": {"res": [1080, 1920], "length": 15101, "fps": 29.97002997002997}, "16AKloM5AiA": {"res": [1080, 1920], "length": 19196, "fps": 24.0}, "7gjfXcm1OVU": {"res": [720, 1280], "length": 13017, "fps": 29.97002997002997}, "3P7VUwkHAtg": {"res": [720, 1280], "length": 568, "fps": 29.97002997002997}, "4gAd3p_Jgq8": {"res": [1080, 1920], "length": 320, "fps": 29.97002997002997}, "6FfnSGBp1AE": {"res": [720, 1280], "length": 2057, "fps": 30.0}, "0fsBkodUEk0": {"res": [1080, 1920], "length": 19441, "fps": 23.976023976023978}, "1KPnGcslx_c": {"res": [720, 1280], "length": 477, "fps": 29.0}, "5s6eouq32Pg": {"res": [1080, 1920], "length": 3670, "fps": 29.97002997002997}, "57tn1i1Nvd0": {"res": [720, 1280], "length": 17645, "fps": 25.0}, "-SHX0FO_hhk": {"res": [1080, 1920], "length": 21501, "fps": 29.97002997002997}, "4w_vVWsljVw": {"res": [1080, 1920], "length": 1545, "fps": 29.97002997002997}, "3t9CEeAW8p4": {"res": [1080, 1920], "length": 1809, "fps": 29.97002997002997}, "5x68QW9O68o": {"res": [720, 1280], "length": 24508, "fps": 29.97002997002997}, "0X3l7sFL-WY": {"res": [720, 1280], "length": 4613, "fps": 23.976023976023978}, "4Y1TNFRw3Nc": {"res": [720, 1280], "length": 2643, "fps": 29.97002997002997}, "7Hnkki8gAjI": {"res": [1080, 1920], "length": 3643, "fps": 29.97002997002997}, "0qHkmp3KNPk": {"res": [720, 1280], "length": 1495, "fps": 30.0}, "58pfZuiBfxs": {"res": [1080, 1920], "length": 13253, "fps": 24.0}, "0L6S0b14ZmI": {"res": [640, 1280], "length": 1806, "fps": 30.0}, "4EvCg5TAe2g": {"res": [1080, 1920], "length": 16746, "fps": 29.97002997002997}, "3Z1LXM7vDlY": {"res": [1080, 1920], "length": 10954, "fps": 23.976023976023978}, "36NRBR7DarU": {"res": [720, 1280], "length": 2926, "fps": 25.0}, "1wQ44ypM1k8": {"res": [1080, 1920], "length": 18169, "fps": 29.97002997002997}, "48TmQtTGQGQ": {"res": [1080, 1920], "length": 9085, "fps": 23.976023976023978}, "5w4B1Lxchfk": {"res": [1080, 1920], "length": 618, "fps": 29.97002997002997}, "51HD1pS4lE0": {"res": [454, 854], "length": 1676, "fps": 29.915}, "2FvaJMgoK6Q": {"res": [1080, 1920], "length": 22982, "fps": 30.0}, "0S8dgbKhxpA": {"res": [720, 1280], "length": 313, "fps": 29.97002997002997}, "4K0aO5Ig4wc": {"res": [1080, 1920], "length": 20832, "fps": 29.97002997002997}, "-LNuGj27Xyk": {"res": [480, 654], "length": 9258, "fps": 29.97002997002997}, "look_and_see": {"res": [1080, 1920], "length": 4833, "fps": 29.97002997002997}, "4Yn0XjF070U": {"res": [480, 640], "length": 17490, "fps": 29.97002997002997}, "4Qp8VKqmNu4": {"res": [1080, 1920], "length": 5815, "fps": 29.97002997002997}, "0SDfBBjRwQk": {"res": [480, 640], "length": 1147, "fps": 30.0}, "0pQSjlMw914": {"res": [360, 640], "length": 9632, "fps": 29.97002997002997}, "7L9_Il0YGXs": {"res": [1080, 1920], "length": 25216, "fps": 29.97002997002997}, "2ALHPv6I0zk": {"res": [1080, 1920], "length": 4577, "fps": 30.0}, "7tsKK7uap18": {"res": [356, 640], "length": 4096, "fps": 29.97002997002997}, "0TFvNc9ysQo": {"res": [1080, 1920], "length": 16917, "fps": 30.0}, "5O0BtJC12gg": {"res": [1080, 1920], "length": 5992, "fps": 23.976023976023978}, "1a__VSfTk4I": {"res": [1080, 1920], "length": 3606, "fps": 25.0}, "-R5AVqsEZYw": {"res": [480, 640], "length": 2269, "fps": 15.0}, "N2YhzBjiYUg": {"res": [720, 1280], "length": 2500, "fps": 25.0}, "5ZeXnuyogvU": {"res": [720, 1280], "length": 10450, "fps": 30.0}, "2X-GamfdBRs": {"res": [720, 1280], "length": 8292, "fps": 29.787}, "3DigAUAvEM0": {"res": [1080, 1920], "length": 12906, "fps": 29.97002997002997}, "4bVux9Ni2_0": {"res": [720, 1280], "length": 3974, "fps": 29.97002997002997}, "7eKKiA5Bpi8": {"res": [1080, 1920], "length": 3413, "fps": 29.97002997002997}, "2Kxcrq-_Jts": {"res": [1080, 1920], "length": 631, "fps": 24.0}, "3TlBMtYgTqU": {"res": [1080, 1920], "length": 5430, "fps": 29.97002997002997}, "64lav2iA5-c": {"res": [720, 1280], "length": 2229, "fps": 30.0}, "37O1I0qQC38": {"res": [1080, 1920], "length": 870, "fps": 29.79}, "-MBhf8zY34g": {"res": [1080, 1920], "length": 11651, "fps": 29.97002997002997}, "-Rw6Rq60ZFc": {"res": [360, 640], "length": 703, "fps": 29.97002997002997}, "3gn32tVVl9g": {"res": [720, 1280], "length": 7944, "fps": 30.0}, "-FSlHH2ReLA": {"res": [480, 854], "length": 615, "fps": 24.0}, "-Nh1lNpdusE": {"res": [720, 1080], "length": 2548, "fps": 30.0}, "6dKIcUCzRHc": {"res": [720, 1280], "length": 21286, "fps": 30.0}, "3D-KZP5NFk8": {"res": [720, 1080], "length": 4418, "fps": 29.97002997002997}, "2ybDcnPdFUk": {"res": [1080, 1920], "length": 8052, "fps": 24.0}, "1Tr77GEcSrE": {"res": [1080, 608], "length": 8252, "fps": 29.0}, "6ApZkKOcahA": {"res": [480, 720], "length": 296, "fps": 29.97002997002997}, "0XGfpv6PUq4": {"res": [1080, 1920], "length": 8063, "fps": 30.0}, "3v8sZtJUTFw": {"res": [1080, 1920], "length": 13583, "fps": 29.97002997002997}, "7hxIc0LfbJo": {"res": [1080, 1920], "length": 14128, "fps": 23.976023976023978}, "1U-rv8VSGpE": {"res": [1080, 1920], "length": 14372, "fps": 29.97002997002997}, "4M7XRJ_Ep24": {"res": [1080, 1920], "length": 8293, "fps": 30.0}, "0cK5SNl2P74": {"res": [360, 640], "length": 9022, "fps": 29.97}, "24fz_9v18Us": {"res": [1080, 1920], "length": 13433, "fps": 29.97002997002997}, "0XUkpqq1Xm4": {"res": [720, 1280], "length": 3108, "fps": 29.97002997002997}, "3DWw3IVnq_s": {"res": [1080, 1920], "length": 4334, "fps": 29.97002997002997}, "-9waJ0kweuU": {"res": [720, 1280], "length": 13096, "fps": 29.97002997002997}, "6mKWf2QB9ps": {"res": [480, 654], "length": 3112, "fps": 29.97002997002997}, "5kx2eB3uh2I": {"res": [1080, 1920], "length": 17129, "fps": 29.97002997002997}, "7rUZO71gBS8": {"res": [1080, 1920], "length": 5803, "fps": 23.976023976023978}, "5v7xTW5puEo": {"res": [1080, 1920], "length": 8185, "fps": 23.976023976023978}, "3cdQLRuKCe4": {"res": [360, 640], "length": 5494, "fps": 30.0}, "6tp9uIX_cG8": {"res": [720, 1280], "length": 2860, "fps": 30.0}, "-Acy6WxMaeM": {"res": [720, 1280], "length": 8873, "fps": 29.97002997002997}, "4b-evcJm2JQ": {"res": [720, 1280], "length": 22486, "fps": 30.0}, "4KDsWM3PBFo": {"res": [1080, 1920], "length": 15257, "fps": 29.97002997002997}, "1nJ_iACyuR8": {"res": [360, 640], "length": 11317, "fps": 29.97002997002997}, "0ewF2xMibeo": {"res": [720, 1280], "length": 4261, "fps": 29.97002997002997}, "3vWrtaDa4YQ": {"res": [720, 1280], "length": 5838, "fps": 29.97002997002997}, "-LneeGWAGhE": {"res": [1080, 1920], "length": 4203, "fps": 23.976023976023978}, "52OxKKP6jqU": {"res": [720, 1280], "length": 7500, "fps": 29.97002997002997}, "7GMPXyXpy8A": {"res": [720, 1280], "length": 3062, "fps": 30.0}, "2pDVyY9wsRg": {"res": [1080, 1920], "length": 2224, "fps": 23.976023976023978}, "3L3uIePGTAQ": {"res": [356, 640], "length": 895, "fps": 29.97002997002997}, "1oR8oO7Gq34": {"res": [720, 1126], "length": 13582, "fps": 25.0}, "7ZMtxeeGUbU": {"res": [1080, 1920], "length": 9274, "fps": 29.97002997002997}, "-XPhFsnjyFA": {"res": [720, 1280], "length": 1829, "fps": 29.97002997002997}, "52MGfKDRnmA": {"res": [1080, 1920], "length": 10966, "fps": 23.976023976023978}, "1Wol7Y3ezhA": {"res": [720, 1280], "length": 10197, "fps": 29.97002997002997}, "1Ko7j5PcUEc": {"res": [480, 640], "length": 2270, "fps": 30.0}, "5aVtAL5Sdmo": {"res": [720, 1280], "length": 21248, "fps": 30.0}, "5js4DK-NuPw": {"res": [720, 1280], "length": 7373, "fps": 29.97002997002997}, "5efMDB67q1I": {"res": [480, 654], "length": 3999, "fps": 29.97002997002997}, "Ptk_1Dc2iPY": {"res": [640, 1280], "length": 7247, "fps": 23.976023976023978}, "1yTbNONofs8": {"res": [720, 1280], "length": 25338, "fps": 29.97002997002997}, "RUHVp4-9bdc": {"res": [720, 1280], "length": 10136, "fps": 30.0}, "5y5Jkyq1fys": {"res": [1080, 1920], "length": 17716, "fps": 24.0}, "5WfjmHKPYIg": {"res": [1080, 1920], "length": 4587, "fps": 29.97002997002997}, "7AzpywexO6A": {"res": [1080, 1920], "length": 700, "fps": 29.97002997002997}, "4k_S5S4-sok": {"res": [720, 1280], "length": 2773, "fps": 30.0}, "11QQwlNKUr4": {"res": [1080, 1920], "length": 21119, "fps": 30.0}, "6uUb72v37QU": {"res": [720, 1280], "length": 5961, "fps": 25.0}, "1ZRmzT-N_Ms": {"res": [480, 640], "length": 2602, "fps": 30.0}, "6ZWvfLQJ4Iw": {"res": [1080, 1920], "length": 15684, "fps": 23.976023976023978}, "2Ut9YbjmJZA": {"res": [720, 1080], "length": 1125, "fps": 30.0}, "1LWobzBO0E0": {"res": [1080, 1920], "length": 6589, "fps": 29.97002997002997}, "40jK2C5Ovfo": {"res": [360, 640], "length": 1441, "fps": 30.0}, "1BeEdmnlHPA": {"res": [720, 1280], "length": 3301, "fps": 30.0}, "29KwekCeoCw": {"res": [720, 1280], "length": 3561, "fps": 29.97002997002997}, "2Hy6uXW2TjY": {"res": [1080, 1920], "length": 4516, "fps": 29.97002997002997}, "1unnQa95RPA": {"res": [1080, 1920], "length": 1136, "fps": 29.97002997002997}, "43xBjlCZgyk": {"res": [480, 640], "length": 291, "fps": 29.97002997002997}, "2QdQqAAn1zA": {"res": [720, 1080], "length": 2080, "fps": 15.0}, "1SL6lR0R8Ws": {"res": [1080, 1920], "length": 15723, "fps": 29.97002997002997}, "qK2LElSqd_0": {"res": [720, 1280], "length": 1870, "fps": 29.97002997002997}}
\ No newline at end of file
diff --git a/WiLoR/wilor/configs/__init__.py b/WiLoR/wilor/configs/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1affbe0bd104d4e24d8cdd9f55c61f72106969e
--- /dev/null
+++ b/WiLoR/wilor/configs/__init__.py
@@ -0,0 +1,114 @@
+import os
+from typing import Dict
+from yacs.config import CfgNode as CN
+
+CACHE_DIR_PRETRAINED = "./pretrained_models/"
+
+def to_lower(x: Dict) -> Dict:
+ """
+ Convert all dictionary keys to lowercase
+ Args:
+ x (dict): Input dictionary
+ Returns:
+ dict: Output dictionary with all keys converted to lowercase
+ """
+ return {k.lower(): v for k, v in x.items()}
+
+_C = CN(new_allowed=True)
+
+_C.GENERAL = CN(new_allowed=True)
+_C.GENERAL.RESUME = True
+_C.GENERAL.TIME_TO_RUN = 3300
+_C.GENERAL.VAL_STEPS = 100
+_C.GENERAL.LOG_STEPS = 100
+_C.GENERAL.CHECKPOINT_STEPS = 20000
+_C.GENERAL.CHECKPOINT_DIR = "checkpoints"
+_C.GENERAL.SUMMARY_DIR = "tensorboard"
+_C.GENERAL.NUM_GPUS = 1
+_C.GENERAL.NUM_WORKERS = 4
+_C.GENERAL.MIXED_PRECISION = True
+_C.GENERAL.ALLOW_CUDA = True
+_C.GENERAL.PIN_MEMORY = False
+_C.GENERAL.DISTRIBUTED = False
+_C.GENERAL.LOCAL_RANK = 0
+_C.GENERAL.USE_SYNCBN = False
+_C.GENERAL.WORLD_SIZE = 1
+
+_C.TRAIN = CN(new_allowed=True)
+_C.TRAIN.NUM_EPOCHS = 100
+_C.TRAIN.BATCH_SIZE = 32
+_C.TRAIN.SHUFFLE = True
+_C.TRAIN.WARMUP = False
+_C.TRAIN.NORMALIZE_PER_IMAGE = False
+_C.TRAIN.CLIP_GRAD = False
+_C.TRAIN.CLIP_GRAD_VALUE = 1.0
+_C.LOSS_WEIGHTS = CN(new_allowed=True)
+
+_C.DATASETS = CN(new_allowed=True)
+
+_C.MODEL = CN(new_allowed=True)
+_C.MODEL.IMAGE_SIZE = 224
+
+_C.EXTRA = CN(new_allowed=True)
+_C.EXTRA.FOCAL_LENGTH = 5000
+
+_C.DATASETS.CONFIG = CN(new_allowed=True)
+_C.DATASETS.CONFIG.SCALE_FACTOR = 0.3
+_C.DATASETS.CONFIG.ROT_FACTOR = 30
+_C.DATASETS.CONFIG.TRANS_FACTOR = 0.02
+_C.DATASETS.CONFIG.COLOR_SCALE = 0.2
+_C.DATASETS.CONFIG.ROT_AUG_RATE = 0.6
+_C.DATASETS.CONFIG.TRANS_AUG_RATE = 0.5
+_C.DATASETS.CONFIG.DO_FLIP = False
+_C.DATASETS.CONFIG.FLIP_AUG_RATE = 0.5
+_C.DATASETS.CONFIG.EXTREME_CROP_AUG_RATE = 0.10
+
+def default_config() -> CN:
+ """
+ Get a yacs CfgNode object with the default config values.
+ """
+ # Return a clone so that the defaults will not be altered
+ # This is for the "local variable" use pattern
+ return _C.clone()
+
+def dataset_config(name='datasets_tar.yaml') -> CN:
+ """
+ Get dataset config file
+ Returns:
+ CfgNode: Dataset config as a yacs CfgNode object.
+ """
+ cfg = CN(new_allowed=True)
+ config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), name)
+ cfg.merge_from_file(config_file)
+ cfg.freeze()
+ return cfg
+
+def dataset_eval_config() -> CN:
+ return dataset_config('datasets_eval.yaml')
+
+def get_config(config_file: str, merge: bool = True, update_cachedir: bool = False) -> CN:
+ """
+ Read a config file and optionally merge it with the default config file.
+ Args:
+ config_file (str): Path to config file.
+ merge (bool): Whether to merge with the default config or not.
+ Returns:
+ CfgNode: Config as a yacs CfgNode object.
+ """
+ if merge:
+ cfg = default_config()
+ else:
+ cfg = CN(new_allowed=True)
+ cfg.merge_from_file(config_file)
+
+ if update_cachedir:
+ def update_path(path: str) -> str:
+ if os.path.isabs(path):
+ return path
+ return os.path.join(CACHE_DIR_PRETRAINED, path)
+
+ cfg.MANO.MODEL_PATH = update_path(cfg.MANO.MODEL_PATH)
+ cfg.MANO.MEAN_PARAMS = update_path(cfg.MANO.MEAN_PARAMS)
+
+ cfg.freeze()
+ return cfg
\ No newline at end of file
diff --git a/WiLoR/wilor/configs/__pycache__/__init__.cpython-311.pyc b/WiLoR/wilor/configs/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..02742872615eee6d0c8303a9f1ee533ea143c9b7
Binary files /dev/null and b/WiLoR/wilor/configs/__pycache__/__init__.cpython-311.pyc differ
diff --git a/WiLoR/wilor/datasets/utils.py b/WiLoR/wilor/datasets/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b09134e78c991d86c3cab1cfe40ba840741a19c7
--- /dev/null
+++ b/WiLoR/wilor/datasets/utils.py
@@ -0,0 +1,994 @@
+"""
+Parts of the code are taken or adapted from
+https://github.com/mkocabas/EpipolarPose/blob/master/lib/utils/img_utils.py
+"""
+import torch
+import numpy as np
+from skimage.transform import rotate, resize
+from skimage.filters import gaussian
+import random
+import cv2
+from typing import List, Dict, Tuple
+from yacs.config import CfgNode
+
+def expand_to_aspect_ratio(input_shape, target_aspect_ratio=None):
+ """Increase the size of the bounding box to match the target shape."""
+ if target_aspect_ratio is None:
+ return input_shape
+
+ try:
+ w , h = input_shape
+ except (ValueError, TypeError):
+ return input_shape
+
+ w_t, h_t = target_aspect_ratio
+ if h / w < h_t / w_t:
+ h_new = max(w * h_t / w_t, h)
+ w_new = w
+ else:
+ h_new = h
+ w_new = max(h * w_t / h_t, w)
+ if h_new < h or w_new < w:
+ breakpoint()
+ return np.array([w_new, h_new])
+
+def do_augmentation(aug_config: CfgNode) -> Tuple:
+ """
+ Compute random augmentation parameters.
+ Args:
+ aug_config (CfgNode): Config containing augmentation parameters.
+ Returns:
+ scale (float): Box rescaling factor.
+ rot (float): Random image rotation.
+ do_flip (bool): Whether to flip image or not.
+ do_extreme_crop (bool): Whether to apply extreme cropping (as proposed in EFT).
+ color_scale (List): Color rescaling factor
+ tx (float): Random translation along the x axis.
+ ty (float): Random translation along the y axis.
+ """
+
+ tx = np.clip(np.random.randn(), -1.0, 1.0) * aug_config.TRANS_FACTOR
+ ty = np.clip(np.random.randn(), -1.0, 1.0) * aug_config.TRANS_FACTOR
+ scale = np.clip(np.random.randn(), -1.0, 1.0) * aug_config.SCALE_FACTOR + 1.0
+ rot = np.clip(np.random.randn(), -2.0,
+ 2.0) * aug_config.ROT_FACTOR if random.random() <= aug_config.ROT_AUG_RATE else 0
+ do_flip = aug_config.DO_FLIP and random.random() <= aug_config.FLIP_AUG_RATE
+ do_extreme_crop = random.random() <= aug_config.EXTREME_CROP_AUG_RATE
+ extreme_crop_lvl = aug_config.get('EXTREME_CROP_AUG_LEVEL', 0)
+ # extreme_crop_lvl = 0
+ c_up = 1.0 + aug_config.COLOR_SCALE
+ c_low = 1.0 - aug_config.COLOR_SCALE
+ color_scale = [random.uniform(c_low, c_up), random.uniform(c_low, c_up), random.uniform(c_low, c_up)]
+ return scale, rot, do_flip, do_extreme_crop, extreme_crop_lvl, color_scale, tx, ty
+
+def rotate_2d(pt_2d: np.array, rot_rad: float) -> np.array:
+ """
+ Rotate a 2D point on the x-y plane.
+ Args:
+ pt_2d (np.array): Input 2D point with shape (2,).
+ rot_rad (float): Rotation angle
+ Returns:
+ np.array: Rotated 2D point.
+ """
+ x = pt_2d[0]
+ y = pt_2d[1]
+ sn, cs = np.sin(rot_rad), np.cos(rot_rad)
+ xx = x * cs - y * sn
+ yy = x * sn + y * cs
+ return np.array([xx, yy], dtype=np.float32)
+
+
+def gen_trans_from_patch_cv(c_x: float, c_y: float,
+ src_width: float, src_height: float,
+ dst_width: float, dst_height: float,
+ scale: float, rot: float) -> np.array:
+ """
+ Create transformation matrix for the bounding box crop.
+ Args:
+ c_x (float): Bounding box center x coordinate in the original image.
+ c_y (float): Bounding box center y coordinate in the original image.
+ src_width (float): Bounding box width.
+ src_height (float): Bounding box height.
+ dst_width (float): Output box width.
+ dst_height (float): Output box height.
+ scale (float): Rescaling factor for the bounding box (augmentation).
+ rot (float): Random rotation applied to the box.
+ Returns:
+ trans (np.array): Target geometric transformation.
+ """
+ # augment size with scale
+ src_w = src_width * scale
+ src_h = src_height * scale
+ src_center = np.zeros(2)
+ src_center[0] = c_x
+ src_center[1] = c_y
+ # augment rotation
+ rot_rad = np.pi * rot / 180
+ src_downdir = rotate_2d(np.array([0, src_h * 0.5], dtype=np.float32), rot_rad)
+ src_rightdir = rotate_2d(np.array([src_w * 0.5, 0], dtype=np.float32), rot_rad)
+
+ dst_w = dst_width
+ dst_h = dst_height
+ dst_center = np.array([dst_w * 0.5, dst_h * 0.5], dtype=np.float32)
+ dst_downdir = np.array([0, dst_h * 0.5], dtype=np.float32)
+ dst_rightdir = np.array([dst_w * 0.5, 0], dtype=np.float32)
+
+ src = np.zeros((3, 2), dtype=np.float32)
+ src[0, :] = src_center
+ src[1, :] = src_center + src_downdir
+ src[2, :] = src_center + src_rightdir
+
+ dst = np.zeros((3, 2), dtype=np.float32)
+ dst[0, :] = dst_center
+ dst[1, :] = dst_center + dst_downdir
+ dst[2, :] = dst_center + dst_rightdir
+
+ trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
+
+ return trans
+
+
+def trans_point2d(pt_2d: np.array, trans: np.array):
+ """
+ Transform a 2D point using translation matrix trans.
+ Args:
+ pt_2d (np.array): Input 2D point with shape (2,).
+ trans (np.array): Transformation matrix.
+ Returns:
+ np.array: Transformed 2D point.
+ """
+ src_pt = np.array([pt_2d[0], pt_2d[1], 1.]).T
+ dst_pt = np.dot(trans, src_pt)
+ return dst_pt[0:2]
+
+def get_transform(center, scale, res, rot=0):
+ """Generate transformation matrix."""
+ """Taken from PARE: https://github.com/mkocabas/PARE/blob/6e0caca86c6ab49ff80014b661350958e5b72fd8/pare/utils/image_utils.py"""
+ h = 200 * scale
+ t = np.zeros((3, 3))
+ t[0, 0] = float(res[1]) / h
+ t[1, 1] = float(res[0]) / h
+ t[0, 2] = res[1] * (-float(center[0]) / h + .5)
+ t[1, 2] = res[0] * (-float(center[1]) / h + .5)
+ t[2, 2] = 1
+ if not rot == 0:
+ rot = -rot # To match direction of rotation from cropping
+ rot_mat = np.zeros((3, 3))
+ rot_rad = rot * np.pi / 180
+ sn, cs = np.sin(rot_rad), np.cos(rot_rad)
+ rot_mat[0, :2] = [cs, -sn]
+ rot_mat[1, :2] = [sn, cs]
+ rot_mat[2, 2] = 1
+ # Need to rotate around center
+ t_mat = np.eye(3)
+ t_mat[0, 2] = -res[1] / 2
+ t_mat[1, 2] = -res[0] / 2
+ t_inv = t_mat.copy()
+ t_inv[:2, 2] *= -1
+ t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
+ return t
+
+
+def transform(pt, center, scale, res, invert=0, rot=0, as_int=True):
+ """Transform pixel location to different reference."""
+ """Taken from PARE: https://github.com/mkocabas/PARE/blob/6e0caca86c6ab49ff80014b661350958e5b72fd8/pare/utils/image_utils.py"""
+ t = get_transform(center, scale, res, rot=rot)
+ if invert:
+ t = np.linalg.inv(t)
+ new_pt = np.array([pt[0] - 1, pt[1] - 1, 1.]).T
+ new_pt = np.dot(t, new_pt)
+ if as_int:
+ new_pt = new_pt.astype(int)
+ return new_pt[:2] + 1
+
+def crop_img(img, ul, br, border_mode=cv2.BORDER_CONSTANT, border_value=0):
+ c_x = (ul[0] + br[0])/2
+ c_y = (ul[1] + br[1])/2
+ bb_width = patch_width = br[0] - ul[0]
+ bb_height = patch_height = br[1] - ul[1]
+ trans = gen_trans_from_patch_cv(c_x, c_y, bb_width, bb_height, patch_width, patch_height, 1.0, 0)
+ img_patch = cv2.warpAffine(img, trans, (int(patch_width), int(patch_height)),
+ flags=cv2.INTER_LINEAR,
+ borderMode=border_mode,
+ borderValue=border_value
+ )
+
+ # Force borderValue=cv2.BORDER_CONSTANT for alpha channel
+ if (img.shape[2] == 4) and (border_mode != cv2.BORDER_CONSTANT):
+ img_patch[:,:,3] = cv2.warpAffine(img[:,:,3], trans, (int(patch_width), int(patch_height)),
+ flags=cv2.INTER_LINEAR,
+ borderMode=cv2.BORDER_CONSTANT,
+ )
+
+ return img_patch
+
+def generate_image_patch_skimage(img: np.array, c_x: float, c_y: float,
+ bb_width: float, bb_height: float,
+ patch_width: float, patch_height: float,
+ do_flip: bool, scale: float, rot: float,
+ border_mode=cv2.BORDER_CONSTANT, border_value=0) -> Tuple[np.array, np.array]:
+ """
+ Crop image according to the supplied bounding box.
+ Args:
+ img (np.array): Input image of shape (H, W, 3)
+ c_x (float): Bounding box center x coordinate in the original image.
+ c_y (float): Bounding box center y coordinate in the original image.
+ bb_width (float): Bounding box width.
+ bb_height (float): Bounding box height.
+ patch_width (float): Output box width.
+ patch_height (float): Output box height.
+ do_flip (bool): Whether to flip image or not.
+ scale (float): Rescaling factor for the bounding box (augmentation).
+ rot (float): Random rotation applied to the box.
+ Returns:
+ img_patch (np.array): Cropped image patch of shape (patch_height, patch_height, 3)
+ trans (np.array): Transformation matrix.
+ """
+
+ img_height, img_width, img_channels = img.shape
+ if do_flip:
+ img = img[:, ::-1, :]
+ c_x = img_width - c_x - 1
+
+ trans = gen_trans_from_patch_cv(c_x, c_y, bb_width, bb_height, patch_width, patch_height, scale, rot)
+
+ #img_patch = cv2.warpAffine(img, trans, (int(patch_width), int(patch_height)), flags=cv2.INTER_LINEAR)
+
+ # skimage
+ center = np.zeros(2)
+ center[0] = c_x
+ center[1] = c_y
+ res = np.zeros(2)
+ res[0] = patch_width
+ res[1] = patch_height
+ # assumes bb_width = bb_height
+ # assumes patch_width = patch_height
+ assert bb_width == bb_height, f'{bb_width=} != {bb_height=}'
+ assert patch_width == patch_height, f'{patch_width=} != {patch_height=}'
+ scale1 = scale*bb_width/200.
+
+ # Upper left point
+ ul = np.array(transform([1, 1], center, scale1, res, invert=1, as_int=False)) - 1
+ # Bottom right point
+ br = np.array(transform([res[0] + 1,
+ res[1] + 1], center, scale1, res, invert=1, as_int=False)) - 1
+
+ # Padding so that when rotated proper amount of context is included
+ try:
+ pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2) + 1
+ except:
+ breakpoint()
+ if not rot == 0:
+ ul -= pad
+ br += pad
+
+
+ if False:
+ # Old way of cropping image
+ ul_int = ul.astype(int)
+ br_int = br.astype(int)
+ new_shape = [br_int[1] - ul_int[1], br_int[0] - ul_int[0]]
+ if len(img.shape) > 2:
+ new_shape += [img.shape[2]]
+ new_img = np.zeros(new_shape)
+
+ # Range to fill new array
+ new_x = max(0, -ul_int[0]), min(br_int[0], len(img[0])) - ul_int[0]
+ new_y = max(0, -ul_int[1]), min(br_int[1], len(img)) - ul_int[1]
+ # Range to sample from original image
+ old_x = max(0, ul_int[0]), min(len(img[0]), br_int[0])
+ old_y = max(0, ul_int[1]), min(len(img), br_int[1])
+ new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1],
+ old_x[0]:old_x[1]]
+
+ # New way of cropping image
+ new_img = crop_img(img, ul, br, border_mode=border_mode, border_value=border_value).astype(np.float32)
+
+ # print(f'{new_img.shape=}')
+ # print(f'{new_img1.shape=}')
+ # print(f'{np.allclose(new_img, new_img1)=}')
+ # print(f'{img.dtype=}')
+
+
+ if not rot == 0:
+ # Remove padding
+
+ new_img = rotate(new_img, rot) # scipy.misc.imrotate(new_img, rot)
+ new_img = new_img[pad:-pad, pad:-pad]
+
+ if new_img.shape[0] < 1 or new_img.shape[1] < 1:
+ print(f'{img.shape=}')
+ print(f'{new_img.shape=}')
+ print(f'{ul=}')
+ print(f'{br=}')
+ print(f'{pad=}')
+ print(f'{rot=}')
+
+ breakpoint()
+
+ # resize image
+ new_img = resize(new_img, res) # scipy.misc.imresize(new_img, res)
+
+ new_img = np.clip(new_img, 0, 255).astype(np.uint8)
+
+ return new_img, trans
+
+
+def generate_image_patch_cv2(img: np.array, c_x: float, c_y: float,
+ bb_width: float, bb_height: float,
+ patch_width: float, patch_height: float,
+ do_flip: bool, scale: float, rot: float,
+ border_mode=cv2.BORDER_CONSTANT, border_value=0) -> Tuple[np.array, np.array]:
+ """
+ Crop the input image and return the crop and the corresponding transformation matrix.
+ Args:
+ img (np.array): Input image of shape (H, W, 3)
+ c_x (float): Bounding box center x coordinate in the original image.
+ c_y (float): Bounding box center y coordinate in the original image.
+ bb_width (float): Bounding box width.
+ bb_height (float): Bounding box height.
+ patch_width (float): Output box width.
+ patch_height (float): Output box height.
+ do_flip (bool): Whether to flip image or not.
+ scale (float): Rescaling factor for the bounding box (augmentation).
+ rot (float): Random rotation applied to the box.
+ Returns:
+ img_patch (np.array): Cropped image patch of shape (patch_height, patch_height, 3)
+ trans (np.array): Transformation matrix.
+ """
+
+ img_height, img_width, img_channels = img.shape
+ if do_flip:
+ img = img[:, ::-1, :]
+ c_x = img_width - c_x - 1
+
+
+ trans = gen_trans_from_patch_cv(c_x, c_y, bb_width, bb_height, patch_width, patch_height, scale, rot)
+
+ img_patch = cv2.warpAffine(img, trans, (int(patch_width), int(patch_height)),
+ flags=cv2.INTER_LINEAR,
+ borderMode=border_mode,
+ borderValue=border_value,
+ )
+ # Force borderValue=cv2.BORDER_CONSTANT for alpha channel
+ if (img.shape[2] == 4) and (border_mode != cv2.BORDER_CONSTANT):
+ img_patch[:,:,3] = cv2.warpAffine(img[:,:,3], trans, (int(patch_width), int(patch_height)),
+ flags=cv2.INTER_LINEAR,
+ borderMode=cv2.BORDER_CONSTANT,
+ )
+
+ return img_patch, trans
+
+
+def convert_cvimg_to_tensor(cvimg: np.array):
+ """
+ Convert image from HWC to CHW format.
+ Args:
+ cvimg (np.array): Image of shape (H, W, 3) as loaded by OpenCV.
+ Returns:
+ np.array: Output image of shape (3, H, W).
+ """
+ # from h,w,c(OpenCV) to c,h,w
+ img = cvimg.copy()
+ img = np.transpose(img, (2, 0, 1))
+ # from int to float
+ img = img.astype(np.float32)
+ return img
+
+def fliplr_params(mano_params: Dict, has_mano_params: Dict) -> Tuple[Dict, Dict]:
+ """
+ Flip MANO parameters when flipping the image.
+ Args:
+ mano_params (Dict): MANO parameter annotations.
+ has_mano_params (Dict): Whether MANO annotations are valid.
+ Returns:
+ Dict, Dict: Flipped MANO parameters and valid flags.
+ """
+ global_orient = mano_params['global_orient'].copy()
+ hand_pose = mano_params['hand_pose'].copy()
+ betas = mano_params['betas'].copy()
+ has_global_orient = has_mano_params['global_orient'].copy()
+ has_hand_pose = has_mano_params['hand_pose'].copy()
+ has_betas = has_mano_params['betas'].copy()
+
+ global_orient[1::3] *= -1
+ global_orient[2::3] *= -1
+ hand_pose[1::3] *= -1
+ hand_pose[2::3] *= -1
+
+ mano_params = {'global_orient': global_orient.astype(np.float32),
+ 'hand_pose': hand_pose.astype(np.float32),
+ 'betas': betas.astype(np.float32)
+ }
+
+ has_mano_params = {'global_orient': has_global_orient,
+ 'hand_pose': has_hand_pose,
+ 'betas': has_betas
+ }
+
+ return mano_params, has_mano_params
+
+
+def fliplr_keypoints(joints: np.array, width: float, flip_permutation: List[int]) -> np.array:
+ """
+ Flip 2D or 3D keypoints.
+ Args:
+ joints (np.array): Array of shape (N, 3) or (N, 4) containing 2D or 3D keypoint locations and confidence.
+ flip_permutation (List): Permutation to apply after flipping.
+ Returns:
+ np.array: Flipped 2D or 3D keypoints with shape (N, 3) or (N, 4) respectively.
+ """
+ joints = joints.copy()
+ # Flip horizontal
+ joints[:, 0] = width - joints[:, 0] - 1
+ joints = joints[flip_permutation, :]
+
+ return joints
+
+def keypoint_3d_processing(keypoints_3d: np.array, flip_permutation: List[int], rot: float, do_flip: float) -> np.array:
+ """
+ Process 3D keypoints (rotation/flipping).
+ Args:
+ keypoints_3d (np.array): Input array of shape (N, 4) containing the 3D keypoints and confidence.
+ flip_permutation (List): Permutation to apply after flipping.
+ rot (float): Random rotation applied to the keypoints.
+ do_flip (bool): Whether to flip keypoints or not.
+ Returns:
+ np.array: Transformed 3D keypoints with shape (N, 4).
+ """
+ if do_flip:
+ keypoints_3d = fliplr_keypoints(keypoints_3d, 1, flip_permutation)
+ # in-plane rotation
+ rot_mat = np.eye(3)
+ if not rot == 0:
+ rot_rad = -rot * np.pi / 180
+ sn,cs = np.sin(rot_rad), np.cos(rot_rad)
+ rot_mat[0,:2] = [cs, -sn]
+ rot_mat[1,:2] = [sn, cs]
+ keypoints_3d[:, :-1] = np.einsum('ij,kj->ki', rot_mat, keypoints_3d[:, :-1])
+ # flip the x coordinates
+ keypoints_3d = keypoints_3d.astype('float32')
+ return keypoints_3d
+
+def rot_aa(aa: np.array, rot: float) -> np.array:
+ """
+ Rotate axis angle parameters.
+ Args:
+ aa (np.array): Axis-angle vector of shape (3,).
+ rot (np.array): Rotation angle in degrees.
+ Returns:
+ np.array: Rotated axis-angle vector.
+ """
+ # pose parameters
+ R = np.array([[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0],
+ [np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0],
+ [0, 0, 1]])
+ # find the rotation of the hand in camera frame
+ per_rdg, _ = cv2.Rodrigues(aa)
+ # apply the global rotation to the global orientation
+ resrot, _ = cv2.Rodrigues(np.dot(R,per_rdg))
+ aa = (resrot.T)[0]
+ return aa.astype(np.float32)
+
+def mano_param_processing(mano_params: Dict, has_mano_params: Dict, rot: float, do_flip: bool) -> Tuple[Dict, Dict]:
+ """
+ Apply random augmentations to the MANO parameters.
+ Args:
+ mano_params (Dict): MANO parameter annotations.
+ has_mano_params (Dict): Whether mano annotations are valid.
+ rot (float): Random rotation applied to the keypoints.
+ do_flip (bool): Whether to flip keypoints or not.
+ Returns:
+ Dict, Dict: Transformed MANO parameters and valid flags.
+ """
+ if do_flip:
+ mano_params, has_mano_params = fliplr_params(mano_params, has_mano_params)
+ mano_params['global_orient'] = rot_aa(mano_params['global_orient'], rot)
+ return mano_params, has_mano_params
+
+
+
+def get_example(img_path: str|np.ndarray, center_x: float, center_y: float,
+ width: float, height: float,
+ keypoints_2d: np.array, keypoints_3d: np.array,
+ mano_params: Dict, has_mano_params: Dict,
+ flip_kp_permutation: List[int],
+ patch_width: int, patch_height: int,
+ mean: np.array, std: np.array,
+ do_augment: bool, is_right: bool, augm_config: CfgNode,
+ is_bgr: bool = True,
+ use_skimage_antialias: bool = False,
+ border_mode: int = cv2.BORDER_CONSTANT,
+ return_trans: bool = False) -> Tuple:
+ """
+ Get an example from the dataset and (possibly) apply random augmentations.
+ Args:
+ img_path (str): Image filename
+ center_x (float): Bounding box center x coordinate in the original image.
+ center_y (float): Bounding box center y coordinate in the original image.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array with shape (N,3) containing the 2D keypoints in the original image coordinates.
+ keypoints_3d (np.array): Array with shape (N,4) containing the 3D keypoints.
+ mano_params (Dict): MANO parameter annotations.
+ has_mano_params (Dict): Whether MANO annotations are valid.
+ flip_kp_permutation (List): Permutation to apply to the keypoints after flipping.
+ patch_width (float): Output box width.
+ patch_height (float): Output box height.
+ mean (np.array): Array of shape (3,) containing the mean for normalizing the input image.
+ std (np.array): Array of shape (3,) containing the std for normalizing the input image.
+ do_augment (bool): Whether to apply data augmentation or not.
+ aug_config (CfgNode): Config containing augmentation parameters.
+ Returns:
+ return img_patch, keypoints_2d, keypoints_3d, mano_params, has_mano_params, img_size
+ img_patch (np.array): Cropped image patch of shape (3, patch_height, patch_height)
+ keypoints_2d (np.array): Array with shape (N,3) containing the transformed 2D keypoints.
+ keypoints_3d (np.array): Array with shape (N,4) containing the transformed 3D keypoints.
+ mano_params (Dict): Transformed MANO parameters.
+ has_mano_params (Dict): Valid flag for transformed MANO parameters.
+ img_size (np.array): Image size of the original image.
+ """
+ if isinstance(img_path, str):
+ # 1. load image
+ cvimg = cv2.imread(img_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
+ if not isinstance(cvimg, np.ndarray):
+ raise IOError("Fail to read %s" % img_path)
+ elif isinstance(img_path, np.ndarray):
+ cvimg = img_path
+ else:
+ raise TypeError('img_path must be either a string or a numpy array')
+ img_height, img_width, img_channels = cvimg.shape
+
+ img_size = np.array([img_height, img_width])
+
+ # 2. get augmentation params
+ if do_augment:
+ scale, rot, do_flip, do_extreme_crop, extreme_crop_lvl, color_scale, tx, ty = do_augmentation(augm_config)
+ else:
+ scale, rot, do_flip, do_extreme_crop, extreme_crop_lvl, color_scale, tx, ty = 1.0, 0, False, False, 0, [1.0, 1.0, 1.0], 0., 0.
+
+ # if it's a left hand, we flip
+ if not is_right:
+ do_flip = True
+
+ if width < 1 or height < 1:
+ breakpoint()
+
+ if do_extreme_crop:
+ if extreme_crop_lvl == 0:
+ center_x1, center_y1, width1, height1 = extreme_cropping(center_x, center_y, width, height, keypoints_2d)
+ elif extreme_crop_lvl == 1:
+ center_x1, center_y1, width1, height1 = extreme_cropping_aggressive(center_x, center_y, width, height, keypoints_2d)
+
+ THRESH = 4
+ if width1 < THRESH or height1 < THRESH:
+ # print(f'{do_extreme_crop=}')
+ # print(f'width: {width}, height: {height}')
+ # print(f'width1: {width1}, height1: {height1}')
+ # print(f'center_x: {center_x}, center_y: {center_y}')
+ # print(f'center_x1: {center_x1}, center_y1: {center_y1}')
+ # print(f'keypoints_2d: {keypoints_2d}')
+ # print(f'\n\n', flush=True)
+ # breakpoint()
+ pass
+ # print(f'skip ==> width1: {width1}, height1: {height1}, width: {width}, height: {height}')
+ else:
+ center_x, center_y, width, height = center_x1, center_y1, width1, height1
+
+ center_x += width * tx
+ center_y += height * ty
+
+ # Process 3D keypoints
+ keypoints_3d = keypoint_3d_processing(keypoints_3d, flip_kp_permutation, rot, do_flip)
+
+ # 3. generate image patch
+ if use_skimage_antialias:
+ # Blur image to avoid aliasing artifacts
+ downsampling_factor = (patch_width / (width*scale))
+ if downsampling_factor > 1.1:
+ cvimg = gaussian(cvimg, sigma=(downsampling_factor-1)/2, channel_axis=2, preserve_range=True, truncate=3.0)
+
+ img_patch_cv, trans = generate_image_patch_cv2(cvimg,
+ center_x, center_y,
+ width, height,
+ patch_width, patch_height,
+ do_flip, scale, rot,
+ border_mode=border_mode)
+
+ # img_patch_cv, trans = generate_image_patch_skimage(cvimg,
+ # center_x, center_y,
+ # width, height,
+ # patch_width, patch_height,
+ # do_flip, scale, rot,
+ # border_mode=border_mode)
+
+ image = img_patch_cv.copy()
+ if is_bgr:
+ image = image[:, :, ::-1]
+ img_patch_cv = image.copy()
+ img_patch = convert_cvimg_to_tensor(image)
+
+
+ mano_params, has_mano_params = mano_param_processing(mano_params, has_mano_params, rot, do_flip)
+
+ # apply normalization
+ for n_c in range(min(img_channels, 3)):
+ img_patch[n_c, :, :] = np.clip(img_patch[n_c, :, :] * color_scale[n_c], 0, 255)
+ if mean is not None and std is not None:
+ img_patch[n_c, :, :] = (img_patch[n_c, :, :] - mean[n_c]) / std[n_c]
+ if do_flip:
+ keypoints_2d = fliplr_keypoints(keypoints_2d, img_width, flip_kp_permutation)
+
+
+ for n_jt in range(len(keypoints_2d)):
+ keypoints_2d[n_jt, 0:2] = trans_point2d(keypoints_2d[n_jt, 0:2], trans)
+ keypoints_2d[:, :-1] = keypoints_2d[:, :-1] / patch_width - 0.5
+
+ if not return_trans:
+ return img_patch, keypoints_2d, keypoints_3d, mano_params, has_mano_params, img_size
+ else:
+ return img_patch, keypoints_2d, keypoints_3d, mano_params, has_mano_params, img_size, trans
+
+def crop_to_hips(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array) -> Tuple:
+ """
+ Extreme cropping: Crop the box up to the hip locations.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ lower_body_keypoints = [10, 11, 13, 14, 19, 20, 21, 22, 23, 24, 25+0, 25+1, 25+4, 25+5]
+ keypoints_2d[lower_body_keypoints, :] = 0
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.1 * scale[0]
+ height = 1.1 * scale[1]
+ return center_x, center_y, width, height
+
+
+def crop_to_shoulders(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array):
+ """
+ Extreme cropping: Crop the box up to the shoulder locations.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ lower_body_keypoints = [3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 19, 20, 21, 22, 23, 24] + [25 + i for i in [0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 16]]
+ keypoints_2d[lower_body_keypoints, :] = 0
+ center, scale = get_bbox(keypoints_2d)
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.2 * scale[0]
+ height = 1.2 * scale[1]
+ return center_x, center_y, width, height
+
+def crop_to_head(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array):
+ """
+ Extreme cropping: Crop the box and keep on only the head.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ lower_body_keypoints = [3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 19, 20, 21, 22, 23, 24] + [25 + i for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 16]]
+ keypoints_2d[lower_body_keypoints, :] = 0
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.3 * scale[0]
+ height = 1.3 * scale[1]
+ return center_x, center_y, width, height
+
+def crop_torso_only(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array):
+ """
+ Extreme cropping: Crop the box and keep on only the torso.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ nontorso_body_keypoints = [0, 3, 4, 6, 7, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] + [25 + i for i in [0, 1, 4, 5, 6, 7, 10, 11, 13, 17, 18]]
+ keypoints_2d[nontorso_body_keypoints, :] = 0
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.1 * scale[0]
+ height = 1.1 * scale[1]
+ return center_x, center_y, width, height
+
+def crop_rightarm_only(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array):
+ """
+ Extreme cropping: Crop the box and keep on only the right arm.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ nonrightarm_body_keypoints = [0, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] + [25 + i for i in [0, 1, 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]]
+ keypoints_2d[nonrightarm_body_keypoints, :] = 0
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.1 * scale[0]
+ height = 1.1 * scale[1]
+ return center_x, center_y, width, height
+
+def crop_leftarm_only(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array):
+ """
+ Extreme cropping: Crop the box and keep on only the left arm.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ nonleftarm_body_keypoints = [0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] + [25 + i for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 14, 15, 16, 17, 18]]
+ keypoints_2d[nonleftarm_body_keypoints, :] = 0
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.1 * scale[0]
+ height = 1.1 * scale[1]
+ return center_x, center_y, width, height
+
+def crop_legs_only(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array):
+ """
+ Extreme cropping: Crop the box and keep on only the legs.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ nonlegs_body_keypoints = [0, 1, 2, 3, 4, 5, 6, 7, 15, 16, 17, 18] + [25 + i for i in [6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18]]
+ keypoints_2d[nonlegs_body_keypoints, :] = 0
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.1 * scale[0]
+ height = 1.1 * scale[1]
+ return center_x, center_y, width, height
+
+def crop_rightleg_only(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array):
+ """
+ Extreme cropping: Crop the box and keep on only the right leg.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ nonrightleg_body_keypoints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + [25 + i for i in [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]]
+ keypoints_2d[nonrightleg_body_keypoints, :] = 0
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.1 * scale[0]
+ height = 1.1 * scale[1]
+ return center_x, center_y, width, height
+
+def crop_leftleg_only(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array):
+ """
+ Extreme cropping: Crop the box and keep on only the left leg.
+ Args:
+ center_x (float): x coordinate of the bounding box center.
+ center_y (float): y coordinate of the bounding box center.
+ width (float): Bounding box width.
+ height (float): Bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ center_x (float): x coordinate of the new bounding box center.
+ center_y (float): y coordinate of the new bounding box center.
+ width (float): New bounding box width.
+ height (float): New bounding box height.
+ """
+ keypoints_2d = keypoints_2d.copy()
+ nonleftleg_body_keypoints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 16, 17, 18, 22, 23, 24] + [25 + i for i in [0, 1, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]]
+ keypoints_2d[nonleftleg_body_keypoints, :] = 0
+ if keypoints_2d[:, -1].sum() > 1:
+ center, scale = get_bbox(keypoints_2d)
+ center_x = center[0]
+ center_y = center[1]
+ width = 1.1 * scale[0]
+ height = 1.1 * scale[1]
+ return center_x, center_y, width, height
+
+def full_body(keypoints_2d: np.array) -> bool:
+ """
+ Check if all main body joints are visible.
+ Args:
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ bool: True if all main body joints are visible.
+ """
+
+ body_keypoints_openpose = [2, 3, 4, 5, 6, 7, 10, 11, 13, 14]
+ body_keypoints = [25 + i for i in [8, 7, 6, 9, 10, 11, 1, 0, 4, 5]]
+ return (np.maximum(keypoints_2d[body_keypoints, -1], keypoints_2d[body_keypoints_openpose, -1]) > 0).sum() == len(body_keypoints)
+
+def upper_body(keypoints_2d: np.array):
+ """
+ Check if all upper body joints are visible.
+ Args:
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ Returns:
+ bool: True if all main body joints are visible.
+ """
+ lower_body_keypoints_openpose = [10, 11, 13, 14]
+ lower_body_keypoints = [25 + i for i in [1, 0, 4, 5]]
+ upper_body_keypoints_openpose = [0, 1, 15, 16, 17, 18]
+ upper_body_keypoints = [25+8, 25+9, 25+12, 25+13, 25+17, 25+18]
+ return ((keypoints_2d[lower_body_keypoints + lower_body_keypoints_openpose, -1] > 0).sum() == 0)\
+ and ((keypoints_2d[upper_body_keypoints + upper_body_keypoints_openpose, -1] > 0).sum() >= 2)
+
+def get_bbox(keypoints_2d: np.array, rescale: float = 1.2) -> Tuple:
+ """
+ Get center and scale for bounding box from openpose detections.
+ Args:
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ rescale (float): Scale factor to rescale bounding boxes computed from the keypoints.
+ Returns:
+ center (np.array): Array of shape (2,) containing the new bounding box center.
+ scale (float): New bounding box scale.
+ """
+ valid = keypoints_2d[:,-1] > 0
+ valid_keypoints = keypoints_2d[valid][:,:-1]
+ center = 0.5 * (valid_keypoints.max(axis=0) + valid_keypoints.min(axis=0))
+ bbox_size = (valid_keypoints.max(axis=0) - valid_keypoints.min(axis=0))
+ # adjust bounding box tightness
+ scale = bbox_size
+ scale *= rescale
+ return center, scale
+
+def extreme_cropping(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array) -> Tuple:
+ """
+ Perform extreme cropping
+ Args:
+ center_x (float): x coordinate of bounding box center.
+ center_y (float): y coordinate of bounding box center.
+ width (float): bounding box width.
+ height (float): bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ rescale (float): Scale factor to rescale bounding boxes computed from the keypoints.
+ Returns:
+ center_x (float): x coordinate of bounding box center.
+ center_y (float): y coordinate of bounding box center.
+ width (float): bounding box width.
+ height (float): bounding box height.
+ """
+ p = torch.rand(1).item()
+ if full_body(keypoints_2d):
+ if p < 0.7:
+ center_x, center_y, width, height = crop_to_hips(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.9:
+ center_x, center_y, width, height = crop_to_shoulders(center_x, center_y, width, height, keypoints_2d)
+ else:
+ center_x, center_y, width, height = crop_to_head(center_x, center_y, width, height, keypoints_2d)
+ elif upper_body(keypoints_2d):
+ if p < 0.9:
+ center_x, center_y, width, height = crop_to_shoulders(center_x, center_y, width, height, keypoints_2d)
+ else:
+ center_x, center_y, width, height = crop_to_head(center_x, center_y, width, height, keypoints_2d)
+
+ return center_x, center_y, max(width, height), max(width, height)
+
+def extreme_cropping_aggressive(center_x: float, center_y: float, width: float, height: float, keypoints_2d: np.array) -> Tuple:
+ """
+ Perform aggressive extreme cropping
+ Args:
+ center_x (float): x coordinate of bounding box center.
+ center_y (float): y coordinate of bounding box center.
+ width (float): bounding box width.
+ height (float): bounding box height.
+ keypoints_2d (np.array): Array of shape (N, 3) containing 2D keypoint locations.
+ rescale (float): Scale factor to rescale bounding boxes computed from the keypoints.
+ Returns:
+ center_x (float): x coordinate of bounding box center.
+ center_y (float): y coordinate of bounding box center.
+ width (float): bounding box width.
+ height (float): bounding box height.
+ """
+ p = torch.rand(1).item()
+ if full_body(keypoints_2d):
+ if p < 0.2:
+ center_x, center_y, width, height = crop_to_hips(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.3:
+ center_x, center_y, width, height = crop_to_shoulders(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.4:
+ center_x, center_y, width, height = crop_to_head(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.5:
+ center_x, center_y, width, height = crop_torso_only(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.6:
+ center_x, center_y, width, height = crop_rightarm_only(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.7:
+ center_x, center_y, width, height = crop_leftarm_only(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.8:
+ center_x, center_y, width, height = crop_legs_only(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.9:
+ center_x, center_y, width, height = crop_rightleg_only(center_x, center_y, width, height, keypoints_2d)
+ else:
+ center_x, center_y, width, height = crop_leftleg_only(center_x, center_y, width, height, keypoints_2d)
+ elif upper_body(keypoints_2d):
+ if p < 0.2:
+ center_x, center_y, width, height = crop_to_shoulders(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.4:
+ center_x, center_y, width, height = crop_to_head(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.6:
+ center_x, center_y, width, height = crop_torso_only(center_x, center_y, width, height, keypoints_2d)
+ elif p < 0.8:
+ center_x, center_y, width, height = crop_rightarm_only(center_x, center_y, width, height, keypoints_2d)
+ else:
+ center_x, center_y, width, height = crop_leftarm_only(center_x, center_y, width, height, keypoints_2d)
+ return center_x, center_y, max(width, height), max(width, height)
diff --git a/WiLoR/wilor/datasets/vitdet_dataset.py b/WiLoR/wilor/datasets/vitdet_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..adc1b978498278b24a3a70244fccc2f81ed0d769
--- /dev/null
+++ b/WiLoR/wilor/datasets/vitdet_dataset.py
@@ -0,0 +1,95 @@
+from typing import Dict
+
+import cv2
+import numpy as np
+from skimage.filters import gaussian
+from yacs.config import CfgNode
+import torch
+
+from .utils import (convert_cvimg_to_tensor,
+ expand_to_aspect_ratio,
+ generate_image_patch_cv2)
+
+DEFAULT_MEAN = 255. * np.array([0.485, 0.456, 0.406])
+DEFAULT_STD = 255. * np.array([0.229, 0.224, 0.225])
+
+class ViTDetDataset(torch.utils.data.Dataset):
+
+ def __init__(self,
+ cfg: CfgNode,
+ img_cv2: np.array,
+ boxes: np.array,
+ right: np.array,
+ rescale_factor=2.5,
+ train: bool = False,
+ **kwargs):
+ super().__init__()
+ self.cfg = cfg
+ self.img_cv2 = img_cv2
+ # self.boxes = boxes
+
+ assert train == False, "ViTDetDataset is only for inference"
+ self.train = train
+ self.img_size = cfg.MODEL.IMAGE_SIZE
+ self.mean = 255. * np.array(self.cfg.MODEL.IMAGE_MEAN)
+ self.std = 255. * np.array(self.cfg.MODEL.IMAGE_STD)
+
+ # Preprocess annotations
+ boxes = boxes.astype(np.float32)
+ self.center = (boxes[:, 2:4] + boxes[:, 0:2]) / 2.0
+ self.scale = rescale_factor * (boxes[:, 2:4] - boxes[:, 0:2]) / 200.0
+ self.personid = np.arange(len(boxes), dtype=np.int32)
+ self.right = right.astype(np.float32)
+
+ def __len__(self) -> int:
+ return len(self.personid)
+
+ def __getitem__(self, idx: int) -> Dict[str, np.array]:
+
+ center = self.center[idx].copy()
+ center_x = center[0]
+ center_y = center[1]
+
+ scale = self.scale[idx]
+ BBOX_SHAPE = self.cfg.MODEL.get('BBOX_SHAPE', None)
+ bbox_size = expand_to_aspect_ratio(scale*200, target_aspect_ratio=BBOX_SHAPE).max()
+
+ patch_width = patch_height = self.img_size
+
+ right = self.right[idx].copy()
+ flip = right == 0
+
+ # 3. generate image patch
+ # if use_skimage_antialias:
+ cvimg = self.img_cv2.copy()
+ if True:
+ # Blur image to avoid aliasing artifacts
+ downsampling_factor = ((bbox_size*1.0) / patch_width)
+ #print(f'{downsampling_factor=}')
+ downsampling_factor = downsampling_factor / 2.0
+ if downsampling_factor > 1.1:
+ cvimg = gaussian(cvimg, sigma=(downsampling_factor-1)/2, channel_axis=2, preserve_range=True)
+
+
+ img_patch_cv, trans = generate_image_patch_cv2(cvimg,
+ center_x, center_y,
+ bbox_size, bbox_size,
+ patch_width, patch_height,
+ flip, 1.0, 0,
+ border_mode=cv2.BORDER_CONSTANT)
+ img_patch_cv = img_patch_cv[:, :, ::-1]
+ img_patch = convert_cvimg_to_tensor(img_patch_cv)
+
+ # apply normalization
+ for n_c in range(min(self.img_cv2.shape[2], 3)):
+ img_patch[n_c, :, :] = (img_patch[n_c, :, :] - self.mean[n_c]) / self.std[n_c]
+
+ item = {
+ 'img': img_patch,
+ 'personid': int(self.personid[idx]),
+ }
+ item['box_center'] = self.center[idx].copy()
+ item['box_size'] = bbox_size
+ item['img_size'] = 1.0 * np.array([cvimg.shape[1], cvimg.shape[0]])
+ item['right'] = self.right[idx].copy()
+ return item
diff --git a/WiLoR/wilor/models/__init__.py b/WiLoR/wilor/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..aee504cd20e79a255232db620e370fd4280e541b
--- /dev/null
+++ b/WiLoR/wilor/models/__init__.py
@@ -0,0 +1,36 @@
+from .mano_wrapper import MANO
+from .wilor import WiLoR
+
+from .discriminator import Discriminator
+
+def load_wilor(checkpoint_path, cfg_path):
+ from pathlib import Path
+ from wilor.configs import get_config
+ print('Loading ', checkpoint_path)
+ model_cfg = get_config(cfg_path, update_cachedir=True)
+
+ # Override some config values, to crop bbox correctly
+ if ('vit' in model_cfg.MODEL.BACKBONE.TYPE) and ('BBOX_SHAPE' not in model_cfg.MODEL):
+
+ model_cfg.defrost()
+ assert model_cfg.MODEL.IMAGE_SIZE == 256, f"MODEL.IMAGE_SIZE ({model_cfg.MODEL.IMAGE_SIZE}) should be 256 for ViT backbone"
+ model_cfg.MODEL.BBOX_SHAPE = [192,256]
+ model_cfg.freeze()
+
+ # Update config to be compatible with demo
+ if ('PRETRAINED_WEIGHTS' in model_cfg.MODEL.BACKBONE):
+ model_cfg.defrost()
+ model_cfg.MODEL.BACKBONE.pop('PRETRAINED_WEIGHTS')
+ model_cfg.freeze()
+
+ # Update config to be compatible with demo
+
+ if ('DATA_DIR' in model_cfg.MANO):
+ model_cfg.defrost()
+ model_cfg.MANO.DATA_DIR = './mano_data/'
+ model_cfg.MANO.MODEL_PATH = './mano_data/'
+ model_cfg.MANO.MEAN_PARAMS = './mano_data/mano_mean_params.npz'
+ model_cfg.freeze()
+
+ model = WiLoR.load_from_checkpoint(checkpoint_path, strict=False, cfg=model_cfg)
+ return model, model_cfg
diff --git a/WiLoR/wilor/models/__pycache__/__init__.cpython-311.pyc b/WiLoR/wilor/models/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..089b1b2942a6cf3d30dcf03d439e4f03e885bfe7
Binary files /dev/null and b/WiLoR/wilor/models/__pycache__/__init__.cpython-311.pyc differ
diff --git a/WiLoR/wilor/models/__pycache__/discriminator.cpython-311.pyc b/WiLoR/wilor/models/__pycache__/discriminator.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7cc134ded7dbcf33729853f604419725016b74db
Binary files /dev/null and b/WiLoR/wilor/models/__pycache__/discriminator.cpython-311.pyc differ
diff --git a/WiLoR/wilor/models/__pycache__/losses.cpython-311.pyc b/WiLoR/wilor/models/__pycache__/losses.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..daa6050c6928eabd384bf160e066d34014251ec3
Binary files /dev/null and b/WiLoR/wilor/models/__pycache__/losses.cpython-311.pyc differ
diff --git a/WiLoR/wilor/models/__pycache__/mano_wrapper.cpython-311.pyc b/WiLoR/wilor/models/__pycache__/mano_wrapper.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bae9ca2b8190464ee2c9acaf7fbd5de906b9f4bf
Binary files /dev/null and b/WiLoR/wilor/models/__pycache__/mano_wrapper.cpython-311.pyc differ
diff --git a/WiLoR/wilor/models/__pycache__/wilor.cpython-311.pyc b/WiLoR/wilor/models/__pycache__/wilor.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8d538956f5af190996bef8594c8e8955cfe28b9c
Binary files /dev/null and b/WiLoR/wilor/models/__pycache__/wilor.cpython-311.pyc differ
diff --git a/WiLoR/wilor/models/backbones/__init__.py b/WiLoR/wilor/models/backbones/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c429628219083ad80d84874506d6fb34a43ced89
--- /dev/null
+++ b/WiLoR/wilor/models/backbones/__init__.py
@@ -0,0 +1,17 @@
+from .vit import vit
+
+def create_backbone(cfg):
+ if cfg.MODEL.BACKBONE.TYPE == 'vit':
+ return vit(cfg)
+ elif cfg.MODEL.BACKBONE.TYPE == 'fast_vit':
+ import torch
+ import sys
+ from timm.models import create_model
+ #from models.modules.mobileone import reparameterize_model
+ fast_vit = create_model("fastvit_ma36", drop_path_rate=0.2)
+ checkpoint = torch.load('./pretrained_models/fastvit_ma36.pt')
+ fast_vit.load_state_dict(checkpoint['state_dict'])
+ return fast_vit
+
+ else:
+ raise NotImplementedError('Backbone type is not implemented')
diff --git a/WiLoR/wilor/models/backbones/__pycache__/__init__.cpython-310.pyc b/WiLoR/wilor/models/backbones/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ff67ee27204e268f98d1ba36dc85cd8336d95200
Binary files /dev/null and b/WiLoR/wilor/models/backbones/__pycache__/__init__.cpython-310.pyc differ
diff --git a/WiLoR/wilor/models/backbones/__pycache__/__init__.cpython-311.pyc b/WiLoR/wilor/models/backbones/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..be9f541208902d564103158372850f08b19bd9e7
Binary files /dev/null and b/WiLoR/wilor/models/backbones/__pycache__/__init__.cpython-311.pyc differ
diff --git a/WiLoR/wilor/models/backbones/__pycache__/vit.cpython-310.pyc b/WiLoR/wilor/models/backbones/__pycache__/vit.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6258c0505de7405b3e799695133c91028d6aeaca
Binary files /dev/null and b/WiLoR/wilor/models/backbones/__pycache__/vit.cpython-310.pyc differ
diff --git a/WiLoR/wilor/models/backbones/__pycache__/vit.cpython-311.pyc b/WiLoR/wilor/models/backbones/__pycache__/vit.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ce2610beaeacb5e5f92d5f93648f120e8f7f85d8
Binary files /dev/null and b/WiLoR/wilor/models/backbones/__pycache__/vit.cpython-311.pyc differ
diff --git a/WiLoR/wilor/models/backbones/vit.py b/WiLoR/wilor/models/backbones/vit.py
new file mode 100644
index 0000000000000000000000000000000000000000..89f96f9e7ff91e75a3e9088a8bf11a731962ebca
--- /dev/null
+++ b/WiLoR/wilor/models/backbones/vit.py
@@ -0,0 +1,410 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import math
+import numpy as np
+import torch
+from functools import partial
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+from ...utils.geometry import rot6d_to_rotmat, aa_to_rotmat
+from timm.models.layers import drop_path, to_2tuple, trunc_normal_
+
+def vit(cfg):
+ return ViT(
+ img_size=(256, 192),
+ patch_size=16,
+ embed_dim=1280,
+ depth=32,
+ num_heads=16,
+ ratio=1,
+ use_checkpoint=False,
+ mlp_ratio=4,
+ qkv_bias=True,
+ drop_path_rate=0.55,
+ cfg = cfg
+ )
+
+def get_abs_pos(abs_pos, h, w, ori_h, ori_w, has_cls_token=True):
+ """
+ Calculate absolute positional embeddings. If needed, resize embeddings and remove cls_token
+ dimension for the original embeddings.
+ Args:
+ abs_pos (Tensor): absolute positional embeddings with (1, num_position, C).
+ has_cls_token (bool): If true, has 1 embedding in abs_pos for cls token.
+ hw (Tuple): size of input image tokens.
+
+ Returns:
+ Absolute positional embeddings after processing with shape (1, H, W, C)
+ """
+ cls_token = None
+ B, L, C = abs_pos.shape
+ if has_cls_token:
+ cls_token = abs_pos[:, 0:1]
+ abs_pos = abs_pos[:, 1:]
+
+ if ori_h != h or ori_w != w:
+ new_abs_pos = F.interpolate(
+ abs_pos.reshape(1, ori_h, ori_w, -1).permute(0, 3, 1, 2),
+ size=(h, w),
+ mode="bicubic",
+ align_corners=False,
+ ).permute(0, 2, 3, 1).reshape(B, -1, C)
+
+ else:
+ new_abs_pos = abs_pos
+
+ if cls_token is not None:
+ new_abs_pos = torch.cat([cls_token, new_abs_pos], dim=1)
+ return new_abs_pos
+
+class DropPath(nn.Module):
+ """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+ """
+ def __init__(self, drop_prob=None):
+ super(DropPath, self).__init__()
+ self.drop_prob = drop_prob
+
+ def forward(self, x):
+ return drop_path(x, self.drop_prob, self.training)
+
+ def extra_repr(self):
+ return 'p={}'.format(self.drop_prob)
+
+class Mlp(nn.Module):
+ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+ super().__init__()
+ out_features = out_features or in_features
+ hidden_features = hidden_features or in_features
+ self.fc1 = nn.Linear(in_features, hidden_features)
+ self.act = act_layer()
+ self.fc2 = nn.Linear(hidden_features, out_features)
+ self.drop = nn.Dropout(drop)
+
+ def forward(self, x):
+ x = self.fc1(x)
+ x = self.act(x)
+ x = self.fc2(x)
+ x = self.drop(x)
+ return x
+
+class Attention(nn.Module):
+ def __init__(
+ self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
+ proj_drop=0., attn_head_dim=None,):
+ super().__init__()
+ self.num_heads = num_heads
+ head_dim = dim // num_heads
+ self.dim = dim
+
+ if attn_head_dim is not None:
+ head_dim = attn_head_dim
+ all_head_dim = head_dim * self.num_heads
+
+ self.scale = qk_scale or head_dim ** -0.5
+
+ self.qkv = nn.Linear(dim, all_head_dim * 3, bias=qkv_bias)
+
+ self.attn_drop = nn.Dropout(attn_drop)
+ self.proj = nn.Linear(all_head_dim, dim)
+ self.proj_drop = nn.Dropout(proj_drop)
+
+ def forward(self, x):
+ B, N, C = x.shape
+ qkv = self.qkv(x)
+ qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
+ q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
+
+ q = q * self.scale
+ attn = (q @ k.transpose(-2, -1))
+
+ attn = attn.softmax(dim=-1)
+ attn = self.attn_drop(attn)
+
+ x = (attn @ v).transpose(1, 2).reshape(B, N, -1)
+ x = self.proj(x)
+ x = self.proj_drop(x)
+
+ return x
+
+class Block(nn.Module):
+
+ def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None,
+ drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU,
+ norm_layer=nn.LayerNorm, attn_head_dim=None
+ ):
+ super().__init__()
+
+ self.norm1 = norm_layer(dim)
+ self.attn = Attention(
+ dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+ attn_drop=attn_drop, proj_drop=drop, attn_head_dim=attn_head_dim
+ )
+
+ # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+ self.norm2 = norm_layer(dim)
+ mlp_hidden_dim = int(dim * mlp_ratio)
+ self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+ def forward(self, x):
+ x = x + self.drop_path(self.attn(self.norm1(x)))
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
+ return x
+
+
+class PatchEmbed(nn.Module):
+ """ Image to Patch Embedding
+ """
+ def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, ratio=1):
+ super().__init__()
+ img_size = to_2tuple(img_size)
+ patch_size = to_2tuple(patch_size)
+ num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) * (ratio ** 2)
+ self.patch_shape = (int(img_size[0] // patch_size[0] * ratio), int(img_size[1] // patch_size[1] * ratio))
+ self.origin_patch_shape = (int(img_size[0] // patch_size[0]), int(img_size[1] // patch_size[1]))
+ self.img_size = img_size
+ self.patch_size = patch_size
+ self.num_patches = num_patches
+
+ self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=(patch_size[0] // ratio), padding=4 + 2 * (ratio//2-1))
+
+ def forward(self, x, **kwargs):
+ B, C, H, W = x.shape
+ x = self.proj(x)
+ Hp, Wp = x.shape[2], x.shape[3]
+
+ x = x.flatten(2).transpose(1, 2)
+ return x, (Hp, Wp)
+
+
+class HybridEmbed(nn.Module):
+ """ CNN Feature Map Embedding
+ Extract feature map from CNN, flatten, project to embedding dim.
+ """
+ def __init__(self, backbone, img_size=224, feature_size=None, in_chans=3, embed_dim=768):
+ super().__init__()
+ assert isinstance(backbone, nn.Module)
+ img_size = to_2tuple(img_size)
+ self.img_size = img_size
+ self.backbone = backbone
+ if feature_size is None:
+ with torch.no_grad():
+ training = backbone.training
+ if training:
+ backbone.eval()
+ o = self.backbone(torch.zeros(1, in_chans, img_size[0], img_size[1]))[-1]
+ feature_size = o.shape[-2:]
+ feature_dim = o.shape[1]
+ backbone.train(training)
+ else:
+ feature_size = to_2tuple(feature_size)
+ feature_dim = self.backbone.feature_info.channels()[-1]
+ self.num_patches = feature_size[0] * feature_size[1]
+ self.proj = nn.Linear(feature_dim, embed_dim)
+
+ def forward(self, x):
+ x = self.backbone(x)[-1]
+ x = x.flatten(2).transpose(1, 2)
+ x = self.proj(x)
+ return x
+
+
+class ViT(nn.Module):
+
+ def __init__(self,
+ img_size=224, patch_size=16, in_chans=3, num_classes=80, embed_dim=768, depth=12,
+ num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0.,
+ drop_path_rate=0., hybrid_backbone=None, norm_layer=None, use_checkpoint=False,
+ frozen_stages=-1, ratio=1, last_norm=True,
+ patch_padding='pad', freeze_attn=False, freeze_ffn=False,cfg=None,
+ ):
+ # Protect mutable default arguments
+ super(ViT, self).__init__()
+ norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
+ self.num_classes = num_classes
+ self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models
+ self.frozen_stages = frozen_stages
+ self.use_checkpoint = use_checkpoint
+ self.patch_padding = patch_padding
+ self.freeze_attn = freeze_attn
+ self.freeze_ffn = freeze_ffn
+ self.depth = depth
+
+ if hybrid_backbone is not None:
+ self.patch_embed = HybridEmbed(
+ hybrid_backbone, img_size=img_size, in_chans=in_chans, embed_dim=embed_dim)
+ else:
+ self.patch_embed = PatchEmbed(
+ img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim, ratio=ratio)
+ num_patches = self.patch_embed.num_patches
+
+ ##########################################
+ self.cfg = cfg
+ self.joint_rep_type = cfg.MODEL.MANO_HEAD.get('JOINT_REP', '6d')
+ self.joint_rep_dim = {'6d': 6, 'aa': 3}[self.joint_rep_type]
+ npose = self.joint_rep_dim * (cfg.MANO.NUM_HAND_JOINTS + 1)
+ self.npose = npose
+ mean_params = np.load(cfg.MANO.MEAN_PARAMS)
+ init_cam = torch.from_numpy(mean_params['cam'].astype(np.float32)).unsqueeze(0)
+ self.register_buffer('init_cam', init_cam)
+ init_hand_pose = torch.from_numpy(mean_params['pose'].astype(np.float32)).unsqueeze(0)
+ init_betas = torch.from_numpy(mean_params['shape'].astype('float32')).unsqueeze(0)
+ self.register_buffer('init_hand_pose', init_hand_pose)
+ self.register_buffer('init_betas', init_betas)
+
+ self.pose_emb = nn.Linear(self.joint_rep_dim , embed_dim)
+ self.shape_emb = nn.Linear(10 , embed_dim)
+ self.cam_emb = nn.Linear(3 , embed_dim)
+
+ self.decpose = nn.Linear(self.num_features, 6)
+ self.decshape = nn.Linear(self.num_features, 10)
+ self.deccam = nn.Linear(self.num_features, 3)
+ if cfg.MODEL.MANO_HEAD.get('INIT_DECODER_XAVIER', False):
+ # True by default in MLP. False by default in Transformer
+ nn.init.xavier_uniform_(self.decpose.weight, gain=0.01)
+ nn.init.xavier_uniform_(self.decshape.weight, gain=0.01)
+ nn.init.xavier_uniform_(self.deccam.weight, gain=0.01)
+
+
+ ##########################################
+
+ # since the pretraining model has class token
+ self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+
+ dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule
+
+ self.blocks = nn.ModuleList([
+ Block(
+ dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
+ drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer,
+ )
+ for i in range(depth)])
+
+ self.last_norm = norm_layer(embed_dim) if last_norm else nn.Identity()
+
+ if self.pos_embed is not None:
+ trunc_normal_(self.pos_embed, std=.02)
+
+ self._freeze_stages()
+
+ def _freeze_stages(self):
+ """Freeze parameters."""
+ if self.frozen_stages >= 0:
+ self.patch_embed.eval()
+ for param in self.patch_embed.parameters():
+ param.requires_grad = False
+
+ for i in range(1, self.frozen_stages + 1):
+ m = self.blocks[i]
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ if self.freeze_attn:
+ for i in range(0, self.depth):
+ m = self.blocks[i]
+ m.attn.eval()
+ m.norm1.eval()
+ for param in m.attn.parameters():
+ param.requires_grad = False
+ for param in m.norm1.parameters():
+ param.requires_grad = False
+
+ if self.freeze_ffn:
+ self.pos_embed.requires_grad = False
+ self.patch_embed.eval()
+ for param in self.patch_embed.parameters():
+ param.requires_grad = False
+ for i in range(0, self.depth):
+ m = self.blocks[i]
+ m.mlp.eval()
+ m.norm2.eval()
+ for param in m.mlp.parameters():
+ param.requires_grad = False
+ for param in m.norm2.parameters():
+ param.requires_grad = False
+
+ def init_weights(self):
+ """Initialize the weights in backbone.
+ Args:
+ pretrained (str, optional): Path to pre-trained weights.
+ Defaults to None.
+ """
+ def _init_weights(m):
+ if isinstance(m, nn.Linear):
+ trunc_normal_(m.weight, std=.02)
+ if isinstance(m, nn.Linear) and m.bias is not None:
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.LayerNorm):
+ nn.init.constant_(m.bias, 0)
+ nn.init.constant_(m.weight, 1.0)
+
+ self.apply(_init_weights)
+
+ def get_num_layers(self):
+ return len(self.blocks)
+
+ @torch.jit.ignore
+ def no_weight_decay(self):
+ return {'pos_embed', 'cls_token'}
+
+ def forward_features(self, x):
+ B, C, H, W = x.shape
+ x, (Hp, Wp) = self.patch_embed(x)
+
+ if self.pos_embed is not None:
+ # fit for multiple GPU training
+ # since the first element for pos embed (sin-cos manner) is zero, it will cause no difference
+ x = x + self.pos_embed[:, 1:] + self.pos_embed[:, :1]
+ # X [B, 192, 1280]
+ # x cat [ mean_pose, mean_shape, mean_cam] tokens
+ pose_tokens = self.pose_emb(self.init_hand_pose.reshape(1, self.cfg.MANO.NUM_HAND_JOINTS + 1, self.joint_rep_dim)).repeat(B, 1, 1)
+ shape_tokens = self.shape_emb(self.init_betas).unsqueeze(1).repeat(B, 1, 1)
+ cam_tokens = self.cam_emb(self.init_cam).unsqueeze(1).repeat(B, 1, 1)
+
+ x = torch.cat([pose_tokens, shape_tokens, cam_tokens, x], 1)
+ for blk in self.blocks:
+ if self.use_checkpoint:
+ x = checkpoint.checkpoint(blk, x)
+ else:
+ x = blk(x)
+
+ x = self.last_norm(x)
+
+
+ pose_feat = x[:, :(self.cfg.MANO.NUM_HAND_JOINTS + 1)]
+ shape_feat = x[:, (self.cfg.MANO.NUM_HAND_JOINTS + 1):1+(self.cfg.MANO.NUM_HAND_JOINTS + 1)]
+ cam_feat = x[:, 1+(self.cfg.MANO.NUM_HAND_JOINTS + 1):2+(self.cfg.MANO.NUM_HAND_JOINTS + 1)]
+
+ #print(pose_feat.shape, shape_feat.shape, cam_feat.shape)
+ pred_hand_pose = self.decpose(pose_feat).reshape(B, -1) + self.init_hand_pose #B , 96
+ pred_betas = self.decshape(shape_feat).reshape(B, -1) + self.init_betas #B , 10
+ pred_cam = self.deccam(cam_feat).reshape(B, -1) + self.init_cam #B , 3
+
+ pred_mano_feats = {}
+ pred_mano_feats['hand_pose'] = pred_hand_pose
+ pred_mano_feats['betas'] = pred_betas
+ pred_mano_feats['cam'] = pred_cam
+
+
+ joint_conversion_fn = {
+ '6d': rot6d_to_rotmat,
+ 'aa': lambda x: aa_to_rotmat(x.view(-1, 3).contiguous())
+ }[self.joint_rep_type]
+
+ pred_hand_pose = joint_conversion_fn(pred_hand_pose).view(B, self.cfg.MANO.NUM_HAND_JOINTS+1, 3, 3)
+ pred_mano_params = {'global_orient': pred_hand_pose[:, [0]],
+ 'hand_pose': pred_hand_pose[:, 1:],
+ 'betas': pred_betas}
+
+ img_feat = x[:, 2+(self.cfg.MANO.NUM_HAND_JOINTS + 1):].reshape(B, Hp, Wp, -1).permute(0, 3, 1, 2)
+ return pred_mano_params, pred_cam, pred_mano_feats, img_feat
+
+ def forward(self, x):
+ x = self.forward_features(x)
+ return x
+
+ def train(self, mode=True):
+ """Convert the model into training mode."""
+ super().train(mode)
+ self._freeze_stages()
diff --git a/WiLoR/wilor/models/discriminator.py b/WiLoR/wilor/models/discriminator.py
new file mode 100644
index 0000000000000000000000000000000000000000..717cdc5744c3611bae61c6ff5c8b844880e4083a
--- /dev/null
+++ b/WiLoR/wilor/models/discriminator.py
@@ -0,0 +1,98 @@
+import torch
+import torch.nn as nn
+
+class Discriminator(nn.Module):
+
+ def __init__(self):
+ """
+ Pose + Shape discriminator proposed in HMR
+ """
+ super(Discriminator, self).__init__()
+
+ self.num_joints = 15
+ # poses_alone
+ self.D_conv1 = nn.Conv2d(9, 32, kernel_size=1)
+ nn.init.xavier_uniform_(self.D_conv1.weight)
+ nn.init.zeros_(self.D_conv1.bias)
+ self.relu = nn.ReLU(inplace=True)
+ self.D_conv2 = nn.Conv2d(32, 32, kernel_size=1)
+ nn.init.xavier_uniform_(self.D_conv2.weight)
+ nn.init.zeros_(self.D_conv2.bias)
+ pose_out = []
+ for i in range(self.num_joints):
+ pose_out_temp = nn.Linear(32, 1)
+ nn.init.xavier_uniform_(pose_out_temp.weight)
+ nn.init.zeros_(pose_out_temp.bias)
+ pose_out.append(pose_out_temp)
+ self.pose_out = nn.ModuleList(pose_out)
+
+ # betas
+ self.betas_fc1 = nn.Linear(10, 10)
+ nn.init.xavier_uniform_(self.betas_fc1.weight)
+ nn.init.zeros_(self.betas_fc1.bias)
+ self.betas_fc2 = nn.Linear(10, 5)
+ nn.init.xavier_uniform_(self.betas_fc2.weight)
+ nn.init.zeros_(self.betas_fc2.bias)
+ self.betas_out = nn.Linear(5, 1)
+ nn.init.xavier_uniform_(self.betas_out.weight)
+ nn.init.zeros_(self.betas_out.bias)
+
+ # poses_joint
+ self.D_alljoints_fc1 = nn.Linear(32*self.num_joints, 1024)
+ nn.init.xavier_uniform_(self.D_alljoints_fc1.weight)
+ nn.init.zeros_(self.D_alljoints_fc1.bias)
+ self.D_alljoints_fc2 = nn.Linear(1024, 1024)
+ nn.init.xavier_uniform_(self.D_alljoints_fc2.weight)
+ nn.init.zeros_(self.D_alljoints_fc2.bias)
+ self.D_alljoints_out = nn.Linear(1024, 1)
+ nn.init.xavier_uniform_(self.D_alljoints_out.weight)
+ nn.init.zeros_(self.D_alljoints_out.bias)
+
+
+ def forward(self, poses: torch.Tensor, betas: torch.Tensor) -> torch.Tensor:
+ """
+ Forward pass of the discriminator.
+ Args:
+ poses (torch.Tensor): Tensor of shape (B, 23, 3, 3) containing a batch of MANO hand poses (excluding the global orientation).
+ betas (torch.Tensor): Tensor of shape (B, 10) containign a batch of MANO beta coefficients.
+ Returns:
+ torch.Tensor: Discriminator output with shape (B, 25)
+ """
+ #bn = poses.shape[0]
+ # poses B x 207
+ #poses = poses.reshape(bn, -1)
+ # poses B x num_joints x 1 x 9
+ poses = poses.reshape(-1, self.num_joints, 1, 9)
+ bn = poses.shape[0]
+ # poses B x 9 x num_joints x 1
+ poses = poses.permute(0, 3, 1, 2).contiguous()
+
+ # poses_alone
+ poses = self.D_conv1(poses)
+ poses = self.relu(poses)
+ poses = self.D_conv2(poses)
+ poses = self.relu(poses)
+
+ poses_out = []
+ for i in range(self.num_joints):
+ poses_out_ = self.pose_out[i](poses[:, :, i, 0])
+ poses_out.append(poses_out_)
+ poses_out = torch.cat(poses_out, dim=1)
+
+ # betas
+ betas = self.betas_fc1(betas)
+ betas = self.relu(betas)
+ betas = self.betas_fc2(betas)
+ betas = self.relu(betas)
+ betas_out = self.betas_out(betas)
+
+ # poses_joint
+ poses = poses.reshape(bn,-1)
+ poses_all = self.D_alljoints_fc1(poses)
+ poses_all = self.relu(poses_all)
+ poses_all = self.D_alljoints_fc2(poses_all)
+ poses_all = self.relu(poses_all)
+ poses_all_out = self.D_alljoints_out(poses_all)
+
+ disc_out = torch.cat((poses_out, betas_out, poses_all_out), 1)
+ return disc_out
diff --git a/WiLoR/wilor/models/heads/__init__.py b/WiLoR/wilor/models/heads/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..40279d65814d344cd6a6f452356c4ac4e6a633b3
--- /dev/null
+++ b/WiLoR/wilor/models/heads/__init__.py
@@ -0,0 +1 @@
+from .refinement_net import RefineNet
\ No newline at end of file
diff --git a/WiLoR/wilor/models/heads/__pycache__/__init__.cpython-310.pyc b/WiLoR/wilor/models/heads/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6b92d73420fca19797b04781dcf502c75195a32e
Binary files /dev/null and b/WiLoR/wilor/models/heads/__pycache__/__init__.cpython-310.pyc differ
diff --git a/WiLoR/wilor/models/heads/__pycache__/__init__.cpython-311.pyc b/WiLoR/wilor/models/heads/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7ad97e8fbfd3ddc02753f830f0bfe744a128ab52
Binary files /dev/null and b/WiLoR/wilor/models/heads/__pycache__/__init__.cpython-311.pyc differ
diff --git a/WiLoR/wilor/models/heads/__pycache__/refinement_net.cpython-310.pyc b/WiLoR/wilor/models/heads/__pycache__/refinement_net.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ac330d451452f2d54170eac2743682dbff0bcd3f
Binary files /dev/null and b/WiLoR/wilor/models/heads/__pycache__/refinement_net.cpython-310.pyc differ
diff --git a/WiLoR/wilor/models/heads/__pycache__/refinement_net.cpython-311.pyc b/WiLoR/wilor/models/heads/__pycache__/refinement_net.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8be02a0a3336bfc41fb346c0aea1b040d6b5a0ca
Binary files /dev/null and b/WiLoR/wilor/models/heads/__pycache__/refinement_net.cpython-311.pyc differ
diff --git a/WiLoR/wilor/models/heads/refinement_net.py b/WiLoR/wilor/models/heads/refinement_net.py
new file mode 100644
index 0000000000000000000000000000000000000000..d034aa90e59327c3f002c0a77f0cbb49896d8afa
--- /dev/null
+++ b/WiLoR/wilor/models/heads/refinement_net.py
@@ -0,0 +1,204 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+from ...utils.geometry import rot6d_to_rotmat, aa_to_rotmat
+from typing import Optional
+
+def make_linear_layers(feat_dims, relu_final=True, use_bn=False):
+ layers = []
+ for i in range(len(feat_dims)-1):
+ layers.append(nn.Linear(feat_dims[i], feat_dims[i+1]))
+
+ # Do not use ReLU for final estimation
+ if i < len(feat_dims)-2 or (i == len(feat_dims)-2 and relu_final):
+ if use_bn:
+ layers.append(nn.BatchNorm1d(feat_dims[i+1]))
+ layers.append(nn.ReLU(inplace=True))
+
+ return nn.Sequential(*layers)
+
+def make_conv_layers(feat_dims, kernel=3, stride=1, padding=1, bnrelu_final=True):
+ layers = []
+ for i in range(len(feat_dims)-1):
+ layers.append(
+ nn.Conv2d(
+ in_channels=feat_dims[i],
+ out_channels=feat_dims[i+1],
+ kernel_size=kernel,
+ stride=stride,
+ padding=padding
+ ))
+ # Do not use BN and ReLU for final estimation
+ if i < len(feat_dims)-2 or (i == len(feat_dims)-2 and bnrelu_final):
+ layers.append(nn.BatchNorm2d(feat_dims[i+1]))
+ layers.append(nn.ReLU(inplace=True))
+
+ return nn.Sequential(*layers)
+
+def make_deconv_layers(feat_dims, bnrelu_final=True):
+ layers = []
+ for i in range(len(feat_dims)-1):
+ layers.append(
+ nn.ConvTranspose2d(
+ in_channels=feat_dims[i],
+ out_channels=feat_dims[i+1],
+ kernel_size=4,
+ stride=2,
+ padding=1,
+ output_padding=0,
+ bias=False))
+
+ # Do not use BN and ReLU for final estimation
+ if i < len(feat_dims)-2 or (i == len(feat_dims)-2 and bnrelu_final):
+ layers.append(nn.BatchNorm2d(feat_dims[i+1]))
+ layers.append(nn.ReLU(inplace=True))
+
+ return nn.Sequential(*layers)
+
+def sample_joint_features(img_feat, joint_xy):
+ height, width = img_feat.shape[2:]
+ x = joint_xy[:, :, 0] / (width - 1) * 2 - 1
+ y = joint_xy[:, :, 1] / (height - 1) * 2 - 1
+ grid = torch.stack((x, y), 2)[:, :, None, :]
+ img_feat = F.grid_sample(img_feat, grid, align_corners=True)[:, :, :, 0] # batch_size, channel_dim, joint_num
+ img_feat = img_feat.permute(0, 2, 1).contiguous() # batch_size, joint_num, channel_dim
+ return img_feat
+
+def perspective_projection(points: torch.Tensor,
+ translation: torch.Tensor,
+ focal_length: torch.Tensor,
+ camera_center: Optional[torch.Tensor] = None,
+ rotation: Optional[torch.Tensor] = None) -> torch.Tensor:
+ """
+ Computes the perspective projection of a set of 3D points.
+ Args:
+ points (torch.Tensor): Tensor of shape (B, N, 3) containing the input 3D points.
+ translation (torch.Tensor): Tensor of shape (B, 3) containing the 3D camera translation.
+ focal_length (torch.Tensor): Tensor of shape (B, 2) containing the focal length in pixels.
+ camera_center (torch.Tensor): Tensor of shape (B, 2) containing the camera center in pixels.
+ rotation (torch.Tensor): Tensor of shape (B, 3, 3) containing the camera rotation.
+ Returns:
+ torch.Tensor: Tensor of shape (B, N, 2) containing the projection of the input points.
+ """
+ batch_size = points.shape[0]
+ if rotation is None:
+ rotation = torch.eye(3, device=points.device, dtype=points.dtype).unsqueeze(0).expand(batch_size, -1, -1)
+ if camera_center is None:
+ camera_center = torch.zeros(batch_size, 2, device=points.device, dtype=points.dtype)
+ # Populate intrinsic camera matrix K.
+ K = torch.zeros([batch_size, 3, 3], device=points.device, dtype=points.dtype)
+ K[:,0,0] = focal_length[:,0]
+ K[:,1,1] = focal_length[:,1]
+ K[:,2,2] = 1.
+ K[:,:-1, -1] = camera_center
+ # Transform points
+ points = torch.einsum('bij,bkj->bki', rotation, points)
+ points = points + translation.unsqueeze(1)
+
+ # Apply perspective distortion
+ projected_points = points / points[:,:,-1].unsqueeze(-1)
+
+ # Apply camera intrinsics
+ projected_points = torch.einsum('bij,bkj->bki', K, projected_points)
+
+ return projected_points[:, :, :-1]
+
+class DeConvNet(nn.Module):
+ def __init__(self, feat_dim=768, upscale=4):
+ super(DeConvNet, self).__init__()
+ self.first_conv = make_conv_layers([feat_dim, feat_dim//2], kernel=1, stride=1, padding=0, bnrelu_final=False)
+ self.deconv = nn.ModuleList([])
+ for i in range(int(math.log2(upscale))+1):
+ if i==0:
+ self.deconv.append(make_deconv_layers([feat_dim//2, feat_dim//4]))
+ elif i==1:
+ self.deconv.append(make_deconv_layers([feat_dim//2, feat_dim//4, feat_dim//8]))
+ elif i==2:
+ self.deconv.append(make_deconv_layers([feat_dim//2, feat_dim//4, feat_dim//8, feat_dim//8]))
+
+ def forward(self, img_feat):
+
+ face_img_feats = []
+ img_feat = self.first_conv(img_feat)
+ face_img_feats.append(img_feat)
+ for i, deconv in enumerate(self.deconv):
+ scale = 2**i
+ img_feat_i = deconv(img_feat)
+ face_img_feat = img_feat_i
+ face_img_feats.append(face_img_feat)
+ return face_img_feats[::-1] # high resolution -> low resolution
+
+class DeConvNet_v2(nn.Module):
+ def __init__(self, feat_dim=768):
+ super(DeConvNet_v2, self).__init__()
+ self.first_conv = make_conv_layers([feat_dim, feat_dim//2], kernel=1, stride=1, padding=0, bnrelu_final=False)
+ self.deconv = nn.Sequential(*[nn.ConvTranspose2d(in_channels=feat_dim//2, out_channels=feat_dim//4, kernel_size=4, stride=4, padding=0, output_padding=0, bias=False),
+ nn.BatchNorm2d(feat_dim//4),
+ nn.ReLU(inplace=True)])
+
+ def forward(self, img_feat):
+
+ face_img_feats = []
+ img_feat = self.first_conv(img_feat)
+ img_feat = self.deconv(img_feat)
+
+ return [img_feat]
+
+class RefineNet(nn.Module):
+ def __init__(self, cfg, feat_dim=1280, upscale=3):
+ super(RefineNet, self).__init__()
+ #self.deconv = DeConvNet_v2(feat_dim=feat_dim)
+ #self.out_dim = feat_dim//4
+
+ self.deconv = DeConvNet(feat_dim=feat_dim, upscale=upscale)
+ self.out_dim = feat_dim//8 + feat_dim//4 + feat_dim//2
+ self.dec_pose = nn.Linear(self.out_dim, 96)
+ self.dec_cam = nn.Linear(self.out_dim, 3)
+ self.dec_shape = nn.Linear(self.out_dim, 10)
+
+ self.cfg = cfg
+ self.joint_rep_type = cfg.MODEL.MANO_HEAD.get('JOINT_REP', '6d')
+ self.joint_rep_dim = {'6d': 6, 'aa': 3}[self.joint_rep_type]
+
+ def forward(self, img_feat, verts_3d, pred_cam, pred_mano_feats, focal_length):
+ B = img_feat.shape[0]
+
+ img_feats = self.deconv(img_feat)
+
+ img_feat_sizes = [img_feat.shape[2] for img_feat in img_feats]
+
+ temp_cams = [torch.stack([pred_cam[:, 1], pred_cam[:, 2],
+ 2*focal_length[:, 0]/(img_feat_size * pred_cam[:, 0] +1e-9)],dim=-1) for img_feat_size in img_feat_sizes]
+
+ verts_2d = [perspective_projection(verts_3d,
+ translation=temp_cams[i],
+ focal_length=focal_length / img_feat_sizes[i]) for i in range(len(img_feat_sizes))]
+
+ vert_feats = [sample_joint_features(img_feats[i], verts_2d[i]).max(1).values for i in range(len(img_feat_sizes))]
+
+ vert_feats = torch.cat(vert_feats, dim=-1)
+
+ delta_pose = self.dec_pose(vert_feats)
+ delta_betas = self.dec_shape(vert_feats)
+ delta_cam = self.dec_cam(vert_feats)
+
+
+ pred_hand_pose = pred_mano_feats['hand_pose'] + delta_pose
+ pred_betas = pred_mano_feats['betas'] + delta_betas
+ pred_cam = pred_mano_feats['cam'] + delta_cam
+
+ joint_conversion_fn = {
+ '6d': rot6d_to_rotmat,
+ 'aa': lambda x: aa_to_rotmat(x.view(-1, 3).contiguous())
+ }[self.joint_rep_type]
+
+ pred_hand_pose = joint_conversion_fn(pred_hand_pose).view(B, self.cfg.MANO.NUM_HAND_JOINTS+1, 3, 3)
+
+ pred_mano_params = {'global_orient': pred_hand_pose[:, [0]],
+ 'hand_pose': pred_hand_pose[:, 1:],
+ 'betas': pred_betas}
+
+ return pred_mano_params, pred_cam
+
+
\ No newline at end of file
diff --git a/WiLoR/wilor/models/losses.py b/WiLoR/wilor/models/losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..1dbd27139c8610dffcf229aa3d1b6943547b5249
--- /dev/null
+++ b/WiLoR/wilor/models/losses.py
@@ -0,0 +1,92 @@
+import torch
+import torch.nn as nn
+
+class Keypoint2DLoss(nn.Module):
+
+ def __init__(self, loss_type: str = 'l1'):
+ """
+ 2D keypoint loss module.
+ Args:
+ loss_type (str): Choose between l1 and l2 losses.
+ """
+ super(Keypoint2DLoss, self).__init__()
+ if loss_type == 'l1':
+ self.loss_fn = nn.L1Loss(reduction='none')
+ elif loss_type == 'l2':
+ self.loss_fn = nn.MSELoss(reduction='none')
+ else:
+ raise NotImplementedError('Unsupported loss function')
+
+ def forward(self, pred_keypoints_2d: torch.Tensor, gt_keypoints_2d: torch.Tensor) -> torch.Tensor:
+ """
+ Compute 2D reprojection loss on the keypoints.
+ Args:
+ pred_keypoints_2d (torch.Tensor): Tensor of shape [B, S, N, 2] containing projected 2D keypoints (B: batch_size, S: num_samples, N: num_keypoints)
+ gt_keypoints_2d (torch.Tensor): Tensor of shape [B, S, N, 3] containing the ground truth 2D keypoints and confidence.
+ Returns:
+ torch.Tensor: 2D keypoint loss.
+ """
+ conf = gt_keypoints_2d[:, :, -1].unsqueeze(-1).clone()
+ batch_size = conf.shape[0]
+ loss = (conf * self.loss_fn(pred_keypoints_2d, gt_keypoints_2d[:, :, :-1])).sum(dim=(1,2))
+ return loss.sum()
+
+
+class Keypoint3DLoss(nn.Module):
+
+ def __init__(self, loss_type: str = 'l1'):
+ """
+ 3D keypoint loss module.
+ Args:
+ loss_type (str): Choose between l1 and l2 losses.
+ """
+ super(Keypoint3DLoss, self).__init__()
+ if loss_type == 'l1':
+ self.loss_fn = nn.L1Loss(reduction='none')
+ elif loss_type == 'l2':
+ self.loss_fn = nn.MSELoss(reduction='none')
+ else:
+ raise NotImplementedError('Unsupported loss function')
+
+ def forward(self, pred_keypoints_3d: torch.Tensor, gt_keypoints_3d: torch.Tensor, pelvis_id: int = 0):
+ """
+ Compute 3D keypoint loss.
+ Args:
+ pred_keypoints_3d (torch.Tensor): Tensor of shape [B, S, N, 3] containing the predicted 3D keypoints (B: batch_size, S: num_samples, N: num_keypoints)
+ gt_keypoints_3d (torch.Tensor): Tensor of shape [B, S, N, 4] containing the ground truth 3D keypoints and confidence.
+ Returns:
+ torch.Tensor: 3D keypoint loss.
+ """
+ batch_size = pred_keypoints_3d.shape[0]
+ gt_keypoints_3d = gt_keypoints_3d.clone()
+ pred_keypoints_3d = pred_keypoints_3d - pred_keypoints_3d[:, pelvis_id, :].unsqueeze(dim=1)
+ gt_keypoints_3d[:, :, :-1] = gt_keypoints_3d[:, :, :-1] - gt_keypoints_3d[:, pelvis_id, :-1].unsqueeze(dim=1)
+ conf = gt_keypoints_3d[:, :, -1].unsqueeze(-1).clone()
+ gt_keypoints_3d = gt_keypoints_3d[:, :, :-1]
+ loss = (conf * self.loss_fn(pred_keypoints_3d, gt_keypoints_3d)).sum(dim=(1,2))
+ return loss.sum()
+
+class ParameterLoss(nn.Module):
+
+ def __init__(self):
+ """
+ MANO parameter loss module.
+ """
+ super(ParameterLoss, self).__init__()
+ self.loss_fn = nn.MSELoss(reduction='none')
+
+ def forward(self, pred_param: torch.Tensor, gt_param: torch.Tensor, has_param: torch.Tensor):
+ """
+ Compute MANO parameter loss.
+ Args:
+ pred_param (torch.Tensor): Tensor of shape [B, S, ...] containing the predicted parameters (body pose / global orientation / betas)
+ gt_param (torch.Tensor): Tensor of shape [B, S, ...] containing the ground truth MANO parameters.
+ Returns:
+ torch.Tensor: L2 parameter loss loss.
+ """
+ batch_size = pred_param.shape[0]
+ num_dims = len(pred_param.shape)
+ mask_dimension = [batch_size] + [1] * (num_dims-1)
+ has_param = has_param.type(pred_param.type()).view(*mask_dimension)
+ loss_param = (has_param * self.loss_fn(pred_param, gt_param))
+ return loss_param.sum()
diff --git a/WiLoR/wilor/models/mano_wrapper.py b/WiLoR/wilor/models/mano_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c58e372ddbb4b28b2a22d528039b4918ed7e9e9
--- /dev/null
+++ b/WiLoR/wilor/models/mano_wrapper.py
@@ -0,0 +1,40 @@
+import torch
+import numpy as np
+import pickle
+from typing import Optional
+import smplx
+from smplx.lbs import vertices2joints
+from smplx.utils import MANOOutput, to_tensor
+from smplx.vertex_ids import vertex_ids
+
+
+class MANO(smplx.MANOLayer):
+ def __init__(self, *args, joint_regressor_extra: Optional[str] = None, **kwargs):
+ """
+ Extension of the official MANO implementation to support more joints.
+ Args:
+ Same as MANOLayer.
+ joint_regressor_extra (str): Path to extra joint regressor.
+ """
+ super(MANO, self).__init__(*args, **kwargs)
+ mano_to_openpose = [0, 13, 14, 15, 16, 1, 2, 3, 17, 4, 5, 6, 18, 10, 11, 12, 19, 7, 8, 9, 20]
+
+ #2, 3, 5, 4, 1
+ if joint_regressor_extra is not None:
+ self.register_buffer('joint_regressor_extra', torch.tensor(pickle.load(open(joint_regressor_extra, 'rb'), encoding='latin1'), dtype=torch.float32))
+ self.register_buffer('extra_joints_idxs', to_tensor(list(vertex_ids['mano'].values()), dtype=torch.long))
+ self.register_buffer('joint_map', torch.tensor(mano_to_openpose, dtype=torch.long))
+
+ def forward(self, *args, **kwargs) -> MANOOutput:
+ """
+ Run forward pass. Same as MANO and also append an extra set of joints if joint_regressor_extra is specified.
+ """
+ mano_output = super(MANO, self).forward(*args, **kwargs)
+ extra_joints = torch.index_select(mano_output.vertices, 1, self.extra_joints_idxs)
+ joints = torch.cat([mano_output.joints, extra_joints], dim=1)
+ joints = joints[:, self.joint_map, :]
+ if hasattr(self, 'joint_regressor_extra'):
+ extra_joints = vertices2joints(self.joint_regressor_extra, mano_output.vertices)
+ joints = torch.cat([joints, extra_joints], dim=1)
+ mano_output.joints = joints
+ return mano_output
diff --git a/WiLoR/wilor/models/wilor.py b/WiLoR/wilor/models/wilor.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd7d7e86428a398ceae7ec7c92e59cc5366f1096
--- /dev/null
+++ b/WiLoR/wilor/models/wilor.py
@@ -0,0 +1,376 @@
+import torch
+import pytorch_lightning as pl
+from typing import Any, Dict, Mapping, Tuple
+
+from yacs.config import CfgNode
+
+from ..utils import SkeletonRenderer, MeshRenderer
+from ..utils.geometry import aa_to_rotmat, perspective_projection
+from ..utils.pylogger import get_pylogger
+from .backbones import create_backbone
+from .heads import RefineNet
+from .discriminator import Discriminator
+from .losses import Keypoint3DLoss, Keypoint2DLoss, ParameterLoss
+from . import MANO
+
+log = get_pylogger(__name__)
+
+class WiLoR(pl.LightningModule):
+
+ def __init__(self, cfg: CfgNode, init_renderer: bool = True):
+ """
+ Setup WiLoR model
+ Args:
+ cfg (CfgNode): Config file as a yacs CfgNode
+ """
+ super().__init__()
+
+ # Save hyperparameters
+ self.save_hyperparameters(logger=False, ignore=['init_renderer'])
+
+ self.cfg = cfg
+ # Create backbone feature extractor
+ self.backbone = create_backbone(cfg)
+ if cfg.MODEL.BACKBONE.get('PRETRAINED_WEIGHTS', None):
+ log.info(f'Loading backbone weights from {cfg.MODEL.BACKBONE.PRETRAINED_WEIGHTS}')
+ self.backbone.load_state_dict(torch.load(cfg.MODEL.BACKBONE.PRETRAINED_WEIGHTS, map_location='cpu')['state_dict'], strict = False)
+
+ # Create RefineNet head
+ self.refine_net = RefineNet(cfg, feat_dim=1280, upscale=3)
+
+ # Create discriminator
+ if self.cfg.LOSS_WEIGHTS.ADVERSARIAL > 0:
+ self.discriminator = Discriminator()
+
+ # Define loss functions
+ self.keypoint_3d_loss = Keypoint3DLoss(loss_type='l1')
+ self.keypoint_2d_loss = Keypoint2DLoss(loss_type='l1')
+ self.mano_parameter_loss = ParameterLoss()
+
+ # Instantiate MANO model
+ mano_cfg = {k.lower(): v for k,v in dict(cfg.MANO).items()}
+ self.mano = MANO(**mano_cfg)
+
+ # Buffer that shows whetheer we need to initialize ActNorm layers
+ self.register_buffer('initialized', torch.tensor(False))
+ # Setup renderer for visualization
+ if init_renderer:
+ self.renderer = SkeletonRenderer(self.cfg)
+ self.mesh_renderer = MeshRenderer(self.cfg, faces=self.mano.faces)
+ else:
+ self.renderer = None
+ self.mesh_renderer = None
+
+
+ # Disable automatic optimization since we use adversarial training
+ self.automatic_optimization = False
+
+ def on_after_backward(self):
+ for name, param in self.named_parameters():
+ if param.grad is None:
+ print(param.shape)
+ print(name)
+
+
+ def get_parameters(self):
+ #all_params = list(self.mano_head.parameters())
+ all_params = list(self.backbone.parameters())
+ return all_params
+
+ def configure_optimizers(self) -> Tuple[torch.optim.Optimizer, torch.optim.Optimizer]:
+ """
+ Setup model and distriminator Optimizers
+ Returns:
+ Tuple[torch.optim.Optimizer, torch.optim.Optimizer]: Model and discriminator optimizers
+ """
+ param_groups = [{'params': filter(lambda p: p.requires_grad, self.get_parameters()), 'lr': self.cfg.TRAIN.LR}]
+
+ optimizer = torch.optim.AdamW(params=param_groups,
+ # lr=self.cfg.TRAIN.LR,
+ weight_decay=self.cfg.TRAIN.WEIGHT_DECAY)
+ optimizer_disc = torch.optim.AdamW(params=self.discriminator.parameters(),
+ lr=self.cfg.TRAIN.LR,
+ weight_decay=self.cfg.TRAIN.WEIGHT_DECAY)
+
+ return optimizer, optimizer_disc
+
+ def forward_step(self, batch: Dict, train: bool = False) -> Dict:
+ """
+ Run a forward step of the network
+ Args:
+ batch (Dict): Dictionary containing batch data
+ train (bool): Flag indicating whether it is training or validation mode
+ Returns:
+ Dict: Dictionary containing the regression output
+ """
+ # Use RGB image as input
+ x = batch['img']
+ batch_size = x.shape[0]
+ # Compute conditioning features using the backbone
+ # if using ViT backbone, we need to use a different aspect ratio
+ temp_mano_params, pred_cam, pred_mano_feats, vit_out = self.backbone(x[:,:,:,32:-32]) # B, 1280, 16, 12
+
+
+ # Compute camera translation
+ device = temp_mano_params['hand_pose'].device
+ dtype = temp_mano_params['hand_pose'].dtype
+ focal_length = self.cfg.EXTRA.FOCAL_LENGTH * torch.ones(batch_size, 2, device=device, dtype=dtype)
+
+
+ ## Temp MANO
+ temp_mano_params['global_orient'] = temp_mano_params['global_orient'].reshape(batch_size, -1, 3, 3)
+ temp_mano_params['hand_pose'] = temp_mano_params['hand_pose'].reshape(batch_size, -1, 3, 3)
+ temp_mano_params['betas'] = temp_mano_params['betas'].reshape(batch_size, -1)
+ temp_mano_output = self.mano(**{k: v.float() for k,v in temp_mano_params.items()}, pose2rot=False)
+ #temp_keypoints_3d = temp_mano_output.joints
+ temp_vertices = temp_mano_output.vertices
+
+ pred_mano_params, pred_cam = self.refine_net(vit_out, temp_vertices, pred_cam, pred_mano_feats, focal_length)
+ # Store useful regression outputs to the output dict
+
+
+ output = {}
+ output['pred_cam'] = pred_cam
+ output['pred_mano_params'] = {k: v.clone() for k,v in pred_mano_params.items()}
+
+ pred_cam_t = torch.stack([pred_cam[:, 1],
+ pred_cam[:, 2],
+ 2*focal_length[:, 0]/(self.cfg.MODEL.IMAGE_SIZE * pred_cam[:, 0] +1e-9)],dim=-1)
+ output['pred_cam_t'] = pred_cam_t
+ output['focal_length'] = focal_length
+
+ # Compute model vertices, joints and the projected joints
+ pred_mano_params['global_orient'] = pred_mano_params['global_orient'].reshape(batch_size, -1, 3, 3)
+ pred_mano_params['hand_pose'] = pred_mano_params['hand_pose'].reshape(batch_size, -1, 3, 3)
+ pred_mano_params['betas'] = pred_mano_params['betas'].reshape(batch_size, -1)
+ mano_output = self.mano(**{k: v.float() for k,v in pred_mano_params.items()}, pose2rot=False)
+ pred_keypoints_3d = mano_output.joints
+ pred_vertices = mano_output.vertices
+
+ output['pred_keypoints_3d'] = pred_keypoints_3d.reshape(batch_size, -1, 3)
+ output['pred_vertices'] = pred_vertices.reshape(batch_size, -1, 3)
+ pred_cam_t = pred_cam_t.reshape(-1, 3)
+ focal_length = focal_length.reshape(-1, 2)
+
+ pred_keypoints_2d = perspective_projection(pred_keypoints_3d,
+ translation=pred_cam_t,
+ focal_length=focal_length / self.cfg.MODEL.IMAGE_SIZE)
+ output['pred_keypoints_2d'] = pred_keypoints_2d.reshape(batch_size, -1, 2)
+
+ return output
+
+ def compute_loss(self, batch: Dict, output: Dict, train: bool = True) -> torch.Tensor:
+ """
+ Compute losses given the input batch and the regression output
+ Args:
+ batch (Dict): Dictionary containing batch data
+ output (Dict): Dictionary containing the regression output
+ train (bool): Flag indicating whether it is training or validation mode
+ Returns:
+ torch.Tensor : Total loss for current batch
+ """
+
+ pred_mano_params = output['pred_mano_params']
+ pred_keypoints_2d = output['pred_keypoints_2d']
+ pred_keypoints_3d = output['pred_keypoints_3d']
+
+
+ batch_size = pred_mano_params['hand_pose'].shape[0]
+ device = pred_mano_params['hand_pose'].device
+ dtype = pred_mano_params['hand_pose'].dtype
+
+ # Get annotations
+ gt_keypoints_2d = batch['keypoints_2d']
+ gt_keypoints_3d = batch['keypoints_3d']
+ gt_mano_params = batch['mano_params']
+ has_mano_params = batch['has_mano_params']
+ is_axis_angle = batch['mano_params_is_axis_angle']
+
+ # Compute 3D keypoint loss
+ loss_keypoints_2d = self.keypoint_2d_loss(pred_keypoints_2d, gt_keypoints_2d)
+ loss_keypoints_3d = self.keypoint_3d_loss(pred_keypoints_3d, gt_keypoints_3d, pelvis_id=0)
+
+ # Compute loss on MANO parameters
+ loss_mano_params = {}
+ for k, pred in pred_mano_params.items():
+ gt = gt_mano_params[k].view(batch_size, -1)
+ if is_axis_angle[k].all():
+ gt = aa_to_rotmat(gt.reshape(-1, 3)).view(batch_size, -1, 3, 3)
+ has_gt = has_mano_params[k]
+ loss_mano_params[k] = self.mano_parameter_loss(pred.reshape(batch_size, -1), gt.reshape(batch_size, -1), has_gt)
+
+ loss = self.cfg.LOSS_WEIGHTS['KEYPOINTS_3D'] * loss_keypoints_3d+\
+ self.cfg.LOSS_WEIGHTS['KEYPOINTS_2D'] * loss_keypoints_2d+\
+ sum([loss_mano_params[k] * self.cfg.LOSS_WEIGHTS[k.upper()] for k in loss_mano_params])
+
+
+ losses = dict(loss=loss.detach(),
+ loss_keypoints_2d=loss_keypoints_2d.detach(),
+ loss_keypoints_3d=loss_keypoints_3d.detach())
+
+ for k, v in loss_mano_params.items():
+ losses['loss_' + k] = v.detach()
+
+ output['losses'] = losses
+
+ return loss
+
+ # Tensoroboard logging should run from first rank only
+ @pl.utilities.rank_zero.rank_zero_only
+ def tensorboard_logging(self, batch: Dict, output: Dict, step_count: int, train: bool = True, write_to_summary_writer: bool = True) -> None:
+ """
+ Log results to Tensorboard
+ Args:
+ batch (Dict): Dictionary containing batch data
+ output (Dict): Dictionary containing the regression output
+ step_count (int): Global training step count
+ train (bool): Flag indicating whether it is training or validation mode
+ """
+
+ mode = 'train' if train else 'val'
+ batch_size = batch['keypoints_2d'].shape[0]
+ images = batch['img']
+ images = images * torch.tensor([0.229, 0.224, 0.225], device=images.device).reshape(1,3,1,1)
+ images = images + torch.tensor([0.485, 0.456, 0.406], device=images.device).reshape(1,3,1,1)
+ #images = 255*images.permute(0, 2, 3, 1).cpu().numpy()
+
+ pred_keypoints_3d = output['pred_keypoints_3d'].detach().reshape(batch_size, -1, 3)
+ pred_vertices = output['pred_vertices'].detach().reshape(batch_size, -1, 3)
+ focal_length = output['focal_length'].detach().reshape(batch_size, 2)
+ gt_keypoints_3d = batch['keypoints_3d']
+ gt_keypoints_2d = batch['keypoints_2d']
+
+ losses = output['losses']
+ pred_cam_t = output['pred_cam_t'].detach().reshape(batch_size, 3)
+ pred_keypoints_2d = output['pred_keypoints_2d'].detach().reshape(batch_size, -1, 2)
+ if write_to_summary_writer:
+ summary_writer = self.logger.experiment
+ for loss_name, val in losses.items():
+ summary_writer.add_scalar(mode +'/' + loss_name, val.detach().item(), step_count)
+ num_images = min(batch_size, self.cfg.EXTRA.NUM_LOG_IMAGES)
+
+ gt_keypoints_3d = batch['keypoints_3d']
+ pred_keypoints_3d = output['pred_keypoints_3d'].detach().reshape(batch_size, -1, 3)
+
+ # We render the skeletons instead of the full mesh because rendering a lot of meshes will make the training slow.
+ #predictions = self.renderer(pred_keypoints_3d[:num_images],
+ # gt_keypoints_3d[:num_images],
+ # 2 * gt_keypoints_2d[:num_images],
+ # images=images[:num_images],
+ # camera_translation=pred_cam_t[:num_images])
+ predictions = self.mesh_renderer.visualize_tensorboard(pred_vertices[:num_images].cpu().numpy(),
+ pred_cam_t[:num_images].cpu().numpy(),
+ images[:num_images].cpu().numpy(),
+ pred_keypoints_2d[:num_images].cpu().numpy(),
+ gt_keypoints_2d[:num_images].cpu().numpy(),
+ focal_length=focal_length[:num_images].cpu().numpy())
+ if write_to_summary_writer:
+ summary_writer.add_image('%s/predictions' % mode, predictions, step_count)
+
+ return predictions
+
+ def forward(self, batch: Dict) -> Dict:
+ """
+ Run a forward step of the network in val mode
+ Args:
+ batch (Dict): Dictionary containing batch data
+ Returns:
+ Dict: Dictionary containing the regression output
+ """
+ return self.forward_step(batch, train=False)
+
+ def training_step_discriminator(self, batch: Dict,
+ hand_pose: torch.Tensor,
+ betas: torch.Tensor,
+ optimizer: torch.optim.Optimizer) -> torch.Tensor:
+ """
+ Run a discriminator training step
+ Args:
+ batch (Dict): Dictionary containing mocap batch data
+ hand_pose (torch.Tensor): Regressed hand pose from current step
+ betas (torch.Tensor): Regressed betas from current step
+ optimizer (torch.optim.Optimizer): Discriminator optimizer
+ Returns:
+ torch.Tensor: Discriminator loss
+ """
+ batch_size = hand_pose.shape[0]
+ gt_hand_pose = batch['hand_pose']
+ gt_betas = batch['betas']
+ gt_rotmat = aa_to_rotmat(gt_hand_pose.view(-1,3)).view(batch_size, -1, 3, 3)
+ disc_fake_out = self.discriminator(hand_pose.detach(), betas.detach())
+ loss_fake = ((disc_fake_out - 0.0) ** 2).sum() / batch_size
+ disc_real_out = self.discriminator(gt_rotmat, gt_betas)
+ loss_real = ((disc_real_out - 1.0) ** 2).sum() / batch_size
+ loss_disc = loss_fake + loss_real
+ loss = self.cfg.LOSS_WEIGHTS.ADVERSARIAL * loss_disc
+ optimizer.zero_grad()
+ self.manual_backward(loss)
+ optimizer.step()
+ return loss_disc.detach()
+
+ def training_step(self, joint_batch: Dict, batch_idx: int) -> Dict:
+ """
+ Run a full training step
+ Args:
+ joint_batch (Dict): Dictionary containing image and mocap batch data
+ batch_idx (int): Unused.
+ batch_idx (torch.Tensor): Unused.
+ Returns:
+ Dict: Dictionary containing regression output.
+ """
+ batch = joint_batch['img']
+ mocap_batch = joint_batch['mocap']
+ optimizer = self.optimizers(use_pl_optimizer=True)
+ if self.cfg.LOSS_WEIGHTS.ADVERSARIAL > 0:
+ optimizer, optimizer_disc = optimizer
+
+ batch_size = batch['img'].shape[0]
+ output = self.forward_step(batch, train=True)
+ pred_mano_params = output['pred_mano_params']
+ if self.cfg.get('UPDATE_GT_SPIN', False):
+ self.update_batch_gt_spin(batch, output)
+ loss = self.compute_loss(batch, output, train=True)
+ if self.cfg.LOSS_WEIGHTS.ADVERSARIAL > 0:
+ disc_out = self.discriminator(pred_mano_params['hand_pose'].reshape(batch_size, -1), pred_mano_params['betas'].reshape(batch_size, -1))
+ loss_adv = ((disc_out - 1.0) ** 2).sum() / batch_size
+ loss = loss + self.cfg.LOSS_WEIGHTS.ADVERSARIAL * loss_adv
+
+ # Error if Nan
+ if torch.isnan(loss):
+ raise ValueError('Loss is NaN')
+
+ optimizer.zero_grad()
+ self.manual_backward(loss)
+ # Clip gradient
+ if self.cfg.TRAIN.get('GRAD_CLIP_VAL', 0) > 0:
+ gn = torch.nn.utils.clip_grad_norm_(self.get_parameters(), self.cfg.TRAIN.GRAD_CLIP_VAL, error_if_nonfinite=True)
+ self.log('train/grad_norm', gn, on_step=True, on_epoch=True, prog_bar=True, logger=True)
+ optimizer.step()
+ if self.cfg.LOSS_WEIGHTS.ADVERSARIAL > 0:
+ loss_disc = self.training_step_discriminator(mocap_batch, pred_mano_params['hand_pose'].reshape(batch_size, -1), pred_mano_params['betas'].reshape(batch_size, -1), optimizer_disc)
+ output['losses']['loss_gen'] = loss_adv
+ output['losses']['loss_disc'] = loss_disc
+
+ if self.global_step > 0 and self.global_step % self.cfg.GENERAL.LOG_STEPS == 0:
+ self.tensorboard_logging(batch, output, self.global_step, train=True)
+
+ self.log('train/loss', output['losses']['loss'], on_step=True, on_epoch=True, prog_bar=True, logger=False)
+
+ return output
+
+ def validation_step(self, batch: Dict, batch_idx: int, dataloader_idx=0) -> Dict:
+ """
+ Run a validation step and log to Tensorboard
+ Args:
+ batch (Dict): Dictionary containing batch data
+ batch_idx (int): Unused.
+ Returns:
+ Dict: Dictionary containing regression output.
+ """
+ # batch_size = batch['img'].shape[0]
+ output = self.forward_step(batch, train=False)
+ loss = self.compute_loss(batch, output, train=False)
+ output['loss'] = loss
+ self.tensorboard_logging(batch, output, self.global_step, train=False)
+
+ return output
diff --git a/WiLoR/wilor/utils/__init__.py b/WiLoR/wilor/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6425e21f9c98bfdb42b57b7cbe82a3bfa3b753a1
--- /dev/null
+++ b/WiLoR/wilor/utils/__init__.py
@@ -0,0 +1,25 @@
+import torch
+from typing import Any
+
+from .renderer import Renderer
+from .mesh_renderer import MeshRenderer
+from .skeleton_renderer import SkeletonRenderer
+from .pose_utils import eval_pose, Evaluator
+
+def recursive_to(x: Any, target: torch.device):
+ """
+ Recursively transfer a batch of data to the target device
+ Args:
+ x (Any): Batch of data.
+ target (torch.device): Target device.
+ Returns:
+ Batch of data where all tensors are transfered to the target device.
+ """
+ if isinstance(x, dict):
+ return {k: recursive_to(v, target) for k, v in x.items()}
+ elif isinstance(x, torch.Tensor):
+ return x.to(target)
+ elif isinstance(x, list):
+ return [recursive_to(i, target) for i in x]
+ else:
+ return x
diff --git a/WiLoR/wilor/utils/__pycache__/__init__.cpython-311.pyc b/WiLoR/wilor/utils/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..04d415d641460f18122f4cd50d39ebcc70414f55
Binary files /dev/null and b/WiLoR/wilor/utils/__pycache__/__init__.cpython-311.pyc differ
diff --git a/WiLoR/wilor/utils/__pycache__/geometry.cpython-311.pyc b/WiLoR/wilor/utils/__pycache__/geometry.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d46bc4ed0aaafb879dc82b65e95fc9bcae65e07b
Binary files /dev/null and b/WiLoR/wilor/utils/__pycache__/geometry.cpython-311.pyc differ
diff --git a/WiLoR/wilor/utils/__pycache__/mesh_renderer.cpython-311.pyc b/WiLoR/wilor/utils/__pycache__/mesh_renderer.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..13c574441a533a21b274d8fdbaac6be7b35d7d1c
Binary files /dev/null and b/WiLoR/wilor/utils/__pycache__/mesh_renderer.cpython-311.pyc differ
diff --git a/WiLoR/wilor/utils/__pycache__/pose_utils.cpython-311.pyc b/WiLoR/wilor/utils/__pycache__/pose_utils.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a916e5452f895afb53faa05b7f4c66730fc0f9d3
Binary files /dev/null and b/WiLoR/wilor/utils/__pycache__/pose_utils.cpython-311.pyc differ
diff --git a/WiLoR/wilor/utils/__pycache__/pylogger.cpython-311.pyc b/WiLoR/wilor/utils/__pycache__/pylogger.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9addb7163d633d94dc76c4f39e49facf1057abf0
Binary files /dev/null and b/WiLoR/wilor/utils/__pycache__/pylogger.cpython-311.pyc differ
diff --git a/WiLoR/wilor/utils/__pycache__/render_openpose.cpython-311.pyc b/WiLoR/wilor/utils/__pycache__/render_openpose.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cb40e4511530074e3402080c7f922f7a99c81b70
Binary files /dev/null and b/WiLoR/wilor/utils/__pycache__/render_openpose.cpython-311.pyc differ
diff --git a/WiLoR/wilor/utils/__pycache__/renderer.cpython-311.pyc b/WiLoR/wilor/utils/__pycache__/renderer.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e668a2d335c9d77a355fcb8c6fc1cce524d3411f
Binary files /dev/null and b/WiLoR/wilor/utils/__pycache__/renderer.cpython-311.pyc differ
diff --git a/WiLoR/wilor/utils/__pycache__/skeleton_renderer.cpython-311.pyc b/WiLoR/wilor/utils/__pycache__/skeleton_renderer.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c48ee07bb26791bea8959e9e31bcadac3e5b743a
Binary files /dev/null and b/WiLoR/wilor/utils/__pycache__/skeleton_renderer.cpython-311.pyc differ
diff --git a/WiLoR/wilor/utils/geometry.py b/WiLoR/wilor/utils/geometry.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad61f487782a34396332a3251053b5c2affbff4a
--- /dev/null
+++ b/WiLoR/wilor/utils/geometry.py
@@ -0,0 +1,102 @@
+from typing import Optional
+import torch
+from torch.nn import functional as F
+
+def aa_to_rotmat(theta: torch.Tensor):
+ """
+ Convert axis-angle representation to rotation matrix.
+ Works by first converting it to a quaternion.
+ Args:
+ theta (torch.Tensor): Tensor of shape (B, 3) containing axis-angle representations.
+ Returns:
+ torch.Tensor: Corresponding rotation matrices with shape (B, 3, 3).
+ """
+ norm = torch.norm(theta + 1e-8, p = 2, dim = 1)
+ angle = torch.unsqueeze(norm, -1)
+ normalized = torch.div(theta, angle)
+ angle = angle * 0.5
+ v_cos = torch.cos(angle)
+ v_sin = torch.sin(angle)
+ quat = torch.cat([v_cos, v_sin * normalized], dim = 1)
+ return quat_to_rotmat(quat)
+
+def quat_to_rotmat(quat: torch.Tensor) -> torch.Tensor:
+ """
+ Convert quaternion representation to rotation matrix.
+ Args:
+ quat (torch.Tensor) of shape (B, 4); 4 <===> (w, x, y, z).
+ Returns:
+ torch.Tensor: Corresponding rotation matrices with shape (B, 3, 3).
+ """
+ norm_quat = quat
+ norm_quat = norm_quat/norm_quat.norm(p=2, dim=1, keepdim=True)
+ w, x, y, z = norm_quat[:,0], norm_quat[:,1], norm_quat[:,2], norm_quat[:,3]
+
+ B = quat.size(0)
+
+ w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2)
+ wx, wy, wz = w*x, w*y, w*z
+ xy, xz, yz = x*y, x*z, y*z
+
+ rotMat = torch.stack([w2 + x2 - y2 - z2, 2*xy - 2*wz, 2*wy + 2*xz,
+ 2*wz + 2*xy, w2 - x2 + y2 - z2, 2*yz - 2*wx,
+ 2*xz - 2*wy, 2*wx + 2*yz, w2 - x2 - y2 + z2], dim=1).view(B, 3, 3)
+ return rotMat
+
+
+def rot6d_to_rotmat(x: torch.Tensor) -> torch.Tensor:
+ """
+ Convert 6D rotation representation to 3x3 rotation matrix.
+ Based on Zhou et al., "On the Continuity of Rotation Representations in Neural Networks", CVPR 2019
+ Args:
+ x (torch.Tensor): (B,6) Batch of 6-D rotation representations.
+ Returns:
+ torch.Tensor: Batch of corresponding rotation matrices with shape (B,3,3).
+ """
+ x = x.reshape(-1,2,3).permute(0, 2, 1).contiguous()
+ a1 = x[:, :, 0]
+ a2 = x[:, :, 1]
+ b1 = F.normalize(a1)
+ b2 = F.normalize(a2 - torch.einsum('bi,bi->b', b1, a2).unsqueeze(-1) * b1)
+ b3 = torch.cross(b1, b2)
+ return torch.stack((b1, b2, b3), dim=-1)
+
+def perspective_projection(points: torch.Tensor,
+ translation: torch.Tensor,
+ focal_length: torch.Tensor,
+ camera_center: Optional[torch.Tensor] = None,
+ rotation: Optional[torch.Tensor] = None) -> torch.Tensor:
+ """
+ Computes the perspective projection of a set of 3D points.
+ Args:
+ points (torch.Tensor): Tensor of shape (B, N, 3) containing the input 3D points.
+ translation (torch.Tensor): Tensor of shape (B, 3) containing the 3D camera translation.
+ focal_length (torch.Tensor): Tensor of shape (B, 2) containing the focal length in pixels.
+ camera_center (torch.Tensor): Tensor of shape (B, 2) containing the camera center in pixels.
+ rotation (torch.Tensor): Tensor of shape (B, 3, 3) containing the camera rotation.
+ Returns:
+ torch.Tensor: Tensor of shape (B, N, 2) containing the projection of the input points.
+ """
+ batch_size = points.shape[0]
+ if rotation is None:
+ rotation = torch.eye(3, device=points.device, dtype=points.dtype).unsqueeze(0).expand(batch_size, -1, -1)
+ if camera_center is None:
+ camera_center = torch.zeros(batch_size, 2, device=points.device, dtype=points.dtype)
+ # Populate intrinsic camera matrix K.
+ K = torch.zeros([batch_size, 3, 3], device=points.device, dtype=points.dtype)
+ K[:,0,0] = focal_length[:,0]
+ K[:,1,1] = focal_length[:,1]
+ K[:,2,2] = 1.
+ K[:,:-1, -1] = camera_center
+
+ # Transform points
+ points = torch.einsum('bij,bkj->bki', rotation, points)
+ points = points + translation.unsqueeze(1)
+
+ # Apply perspective distortion
+ projected_points = points / points[:,:,-1].unsqueeze(-1)
+
+ # Apply camera intrinsics
+ projected_points = torch.einsum('bij,bkj->bki', K, projected_points)
+
+ return projected_points[:, :, :-1]
\ No newline at end of file
diff --git a/WiLoR/wilor/utils/mesh_renderer.py b/WiLoR/wilor/utils/mesh_renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4f301f6aa910f46fad563f4da29f0727c890119
--- /dev/null
+++ b/WiLoR/wilor/utils/mesh_renderer.py
@@ -0,0 +1,149 @@
+import os
+if 'PYOPENGL_PLATFORM' not in os.environ:
+ os.environ['PYOPENGL_PLATFORM'] = 'egl'
+import torch
+from torchvision.utils import make_grid
+import numpy as np
+import pyrender
+import trimesh
+import cv2
+import torch.nn.functional as F
+
+from .render_openpose import render_openpose
+
+def create_raymond_lights():
+ import pyrender
+ thetas = np.pi * np.array([1.0 / 6.0, 1.0 / 6.0, 1.0 / 6.0])
+ phis = np.pi * np.array([0.0, 2.0 / 3.0, 4.0 / 3.0])
+
+ nodes = []
+
+ for phi, theta in zip(phis, thetas):
+ xp = np.sin(theta) * np.cos(phi)
+ yp = np.sin(theta) * np.sin(phi)
+ zp = np.cos(theta)
+
+ z = np.array([xp, yp, zp])
+ z = z / np.linalg.norm(z)
+ x = np.array([-z[1], z[0], 0.0])
+ if np.linalg.norm(x) == 0:
+ x = np.array([1.0, 0.0, 0.0])
+ x = x / np.linalg.norm(x)
+ y = np.cross(z, x)
+
+ matrix = np.eye(4)
+ matrix[:3,:3] = np.c_[x,y,z]
+ nodes.append(pyrender.Node(
+ light=pyrender.DirectionalLight(color=np.ones(3), intensity=1.0),
+ matrix=matrix
+ ))
+
+ return nodes
+
+class MeshRenderer:
+
+ def __init__(self, cfg, faces=None):
+ self.cfg = cfg
+ self.focal_length = cfg.EXTRA.FOCAL_LENGTH
+ self.img_res = cfg.MODEL.IMAGE_SIZE
+ self.renderer = pyrender.OffscreenRenderer(viewport_width=self.img_res,
+ viewport_height=self.img_res,
+ point_size=1.0)
+
+ self.camera_center = [self.img_res // 2, self.img_res // 2]
+ self.faces = faces
+
+ def visualize(self, vertices, camera_translation, images, focal_length=None, nrow=3, padding=2):
+ images_np = np.transpose(images, (0,2,3,1))
+ rend_imgs = []
+ for i in range(vertices.shape[0]):
+ fl = self.focal_length
+ rend_img = torch.from_numpy(np.transpose(self.__call__(vertices[i], camera_translation[i], images_np[i], focal_length=fl, side_view=False), (2,0,1))).float()
+ rend_img_side = torch.from_numpy(np.transpose(self.__call__(vertices[i], camera_translation[i], images_np[i], focal_length=fl, side_view=True), (2,0,1))).float()
+ rend_imgs.append(torch.from_numpy(images[i]))
+ rend_imgs.append(rend_img)
+ rend_imgs.append(rend_img_side)
+ rend_imgs = make_grid(rend_imgs, nrow=nrow, padding=padding)
+ return rend_imgs
+
+ def visualize_tensorboard(self, vertices, camera_translation, images, pred_keypoints, gt_keypoints, focal_length=None, nrow=5, padding=2):
+ images_np = np.transpose(images, (0,2,3,1))
+ rend_imgs = []
+ pred_keypoints = np.concatenate((pred_keypoints, np.ones_like(pred_keypoints)[:, :, [0]]), axis=-1)
+ pred_keypoints = self.img_res * (pred_keypoints + 0.5)
+ gt_keypoints[:, :, :-1] = self.img_res * (gt_keypoints[:, :, :-1] + 0.5)
+ #keypoint_matches = [(1, 12), (2, 8), (3, 7), (4, 6), (5, 9), (6, 10), (7, 11), (8, 14), (9, 2), (10, 1), (11, 0), (12, 3), (13, 4), (14, 5)]
+ for i in range(vertices.shape[0]):
+ fl = self.focal_length
+ rend_img = torch.from_numpy(np.transpose(self.__call__(vertices[i], camera_translation[i], images_np[i], focal_length=fl, side_view=False), (2,0,1))).float()
+ rend_img_side = torch.from_numpy(np.transpose(self.__call__(vertices[i], camera_translation[i], images_np[i], focal_length=fl, side_view=True), (2,0,1))).float()
+ hand_keypoints = pred_keypoints[i, :21]
+ #extra_keypoints = pred_keypoints[i, -19:]
+ #for pair in keypoint_matches:
+ # hand_keypoints[pair[0], :] = extra_keypoints[pair[1], :]
+ pred_keypoints_img = render_openpose(255 * images_np[i].copy(), hand_keypoints) / 255
+ hand_keypoints = gt_keypoints[i, :21]
+ #extra_keypoints = gt_keypoints[i, -19:]
+ #for pair in keypoint_matches:
+ # if extra_keypoints[pair[1], -1] > 0 and hand_keypoints[pair[0], -1] == 0:
+ # hand_keypoints[pair[0], :] = extra_keypoints[pair[1], :]
+ gt_keypoints_img = render_openpose(255*images_np[i].copy(), hand_keypoints) / 255
+ rend_imgs.append(torch.from_numpy(images[i]))
+ rend_imgs.append(rend_img)
+ rend_imgs.append(rend_img_side)
+ rend_imgs.append(torch.from_numpy(pred_keypoints_img).permute(2,0,1))
+ rend_imgs.append(torch.from_numpy(gt_keypoints_img).permute(2,0,1))
+ rend_imgs = make_grid(rend_imgs, nrow=nrow, padding=padding)
+ return rend_imgs
+
+ def __call__(self, vertices, camera_translation, image, focal_length=5000, text=None, resize=None, side_view=False, baseColorFactor=(1.0, 1.0, 0.9, 1.0), rot_angle=90):
+ renderer = pyrender.OffscreenRenderer(viewport_width=image.shape[1],
+ viewport_height=image.shape[0],
+ point_size=1.0)
+ material = pyrender.MetallicRoughnessMaterial(
+ metallicFactor=0.0,
+ alphaMode='OPAQUE',
+ baseColorFactor=baseColorFactor)
+
+ camera_translation[0] *= -1.
+
+ mesh = trimesh.Trimesh(vertices.copy(), self.faces.copy())
+ if side_view:
+ rot = trimesh.transformations.rotation_matrix(
+ np.radians(rot_angle), [0, 1, 0])
+ mesh.apply_transform(rot)
+ rot = trimesh.transformations.rotation_matrix(
+ np.radians(180), [1, 0, 0])
+ mesh.apply_transform(rot)
+ mesh = pyrender.Mesh.from_trimesh(mesh, material=material)
+
+ scene = pyrender.Scene(bg_color=[0.0, 0.0, 0.0, 0.0],
+ ambient_light=(0.3, 0.3, 0.3))
+ scene.add(mesh, 'mesh')
+
+ camera_pose = np.eye(4)
+ camera_pose[:3, 3] = camera_translation
+ camera_center = [image.shape[1] / 2., image.shape[0] / 2.]
+ camera = pyrender.IntrinsicsCamera(fx=focal_length, fy=focal_length,
+ cx=camera_center[0], cy=camera_center[1])
+ scene.add(camera, pose=camera_pose)
+
+
+ light_nodes = create_raymond_lights()
+ for node in light_nodes:
+ scene.add_node(node)
+
+ color, rend_depth = renderer.render(scene, flags=pyrender.RenderFlags.RGBA)
+ color = color.astype(np.float32) / 255.0
+ valid_mask = (color[:, :, -1] > 0)[:, :, np.newaxis]
+ if not side_view:
+ output_img = (color[:, :, :3] * valid_mask +
+ (1 - valid_mask) * image)
+ else:
+ output_img = color[:, :, :3]
+ if resize is not None:
+ output_img = cv2.resize(output_img, resize)
+
+ output_img = output_img.astype(np.float32)
+ renderer.delete()
+ return output_img
diff --git a/WiLoR/wilor/utils/misc.py b/WiLoR/wilor/utils/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a991cc3228fd94cf64ba0c1f80c89a544767028
--- /dev/null
+++ b/WiLoR/wilor/utils/misc.py
@@ -0,0 +1,203 @@
+import time
+import warnings
+from importlib.util import find_spec
+from pathlib import Path
+from typing import Callable, List
+
+import hydra
+from omegaconf import DictConfig, OmegaConf
+from pytorch_lightning import Callback
+from pytorch_lightning.loggers import Logger
+from pytorch_lightning.utilities import rank_zero_only
+
+from . import pylogger, rich_utils
+
+log = pylogger.get_pylogger(__name__)
+
+
+def task_wrapper(task_func: Callable) -> Callable:
+ """Optional decorator that wraps the task function in extra utilities.
+
+ Makes multirun more resistant to failure.
+
+ Utilities:
+ - Calling the `utils.extras()` before the task is started
+ - Calling the `utils.close_loggers()` after the task is finished
+ - Logging the exception if occurs
+ - Logging the task total execution time
+ - Logging the output dir
+ """
+
+ def wrap(cfg: DictConfig):
+
+ # apply extra utilities
+ extras(cfg)
+
+ # execute the task
+ try:
+ start_time = time.time()
+ ret = task_func(cfg=cfg)
+ except Exception as ex:
+ log.exception("") # save exception to `.log` file
+ raise ex
+ finally:
+ path = Path(cfg.paths.output_dir, "exec_time.log")
+ content = f"'{cfg.task_name}' execution time: {time.time() - start_time} (s)"
+ save_file(path, content) # save task execution time (even if exception occurs)
+ close_loggers() # close loggers (even if exception occurs so multirun won't fail)
+
+ log.info(f"Output dir: {cfg.paths.output_dir}")
+
+ return ret
+
+ return wrap
+
+
+def extras(cfg: DictConfig) -> None:
+ """Applies optional utilities before the task is started.
+
+ Utilities:
+ - Ignoring python warnings
+ - Setting tags from command line
+ - Rich config printing
+ """
+
+ # return if no `extras` config
+ if not cfg.get("extras"):
+ log.warning("Extras config not found!
")
+ return
+
+ # disable python warnings
+ if cfg.extras.get("ignore_warnings"):
+ log.info("Disabling python warnings! ")
+ warnings.filterwarnings("ignore")
+
+ # prompt user to input tags from command line if none are provided in the config
+ if cfg.extras.get("enforce_tags"):
+ log.info("Enforcing tags! ")
+ rich_utils.enforce_tags(cfg, save_to_file=True)
+
+ # pretty print config tree using Rich library
+ if cfg.extras.get("print_config"):
+ log.info("Printing config tree with Rich! ")
+ rich_utils.print_config_tree(cfg, resolve=True, save_to_file=True)
+
+
+@rank_zero_only
+def save_file(path: str, content: str) -> None:
+ """Save file in rank zero mode (only on one process in multi-GPU setup)."""
+ with open(path, "w+") as file:
+ file.write(content)
+
+
+def instantiate_callbacks(callbacks_cfg: DictConfig) -> List[Callback]:
+ """Instantiates callbacks from config."""
+ callbacks: List[Callback] = []
+
+ if not callbacks_cfg:
+ log.warning("Callbacks config is empty.")
+ return callbacks
+
+ if not isinstance(callbacks_cfg, DictConfig):
+ raise TypeError("Callbacks config must be a DictConfig!")
+
+ for _, cb_conf in callbacks_cfg.items():
+ if isinstance(cb_conf, DictConfig) and "_target_" in cb_conf:
+ log.info(f"Instantiating callback <{cb_conf._target_}>")
+ callbacks.append(hydra.utils.instantiate(cb_conf))
+
+ return callbacks
+
+
+def instantiate_loggers(logger_cfg: DictConfig) -> List[Logger]:
+ """Instantiates loggers from config."""
+ logger: List[Logger] = []
+
+ if not logger_cfg:
+ log.warning("Logger config is empty.")
+ return logger
+
+ if not isinstance(logger_cfg, DictConfig):
+ raise TypeError("Logger config must be a DictConfig!")
+
+ for _, lg_conf in logger_cfg.items():
+ if isinstance(lg_conf, DictConfig) and "_target_" in lg_conf:
+ log.info(f"Instantiating logger <{lg_conf._target_}>")
+ logger.append(hydra.utils.instantiate(lg_conf))
+
+ return logger
+
+
+@rank_zero_only
+def log_hyperparameters(object_dict: dict) -> None:
+ """Controls which config parts are saved by lightning loggers.
+
+ Additionally saves:
+ - Number of model parameters
+ """
+
+ hparams = {}
+
+ cfg = object_dict["cfg"]
+ model = object_dict["model"]
+ trainer = object_dict["trainer"]
+
+ if not trainer.logger:
+ log.warning("Logger not found! Skipping hyperparameter logging...")
+ return
+
+ # save number of model parameters
+ hparams["model/params/total"] = sum(p.numel() for p in model.parameters())
+ hparams["model/params/trainable"] = sum(
+ p.numel() for p in model.parameters() if p.requires_grad
+ )
+ hparams["model/params/non_trainable"] = sum(
+ p.numel() for p in model.parameters() if not p.requires_grad
+ )
+
+ for k in cfg.keys():
+ hparams[k] = cfg.get(k)
+
+ # Resolve all interpolations
+ def _resolve(_cfg):
+ if isinstance(_cfg, DictConfig):
+ _cfg = OmegaConf.to_container(_cfg, resolve=True)
+ return _cfg
+
+ hparams = {k: _resolve(v) for k, v in hparams.items()}
+
+ # send hparams to all loggers
+ trainer.logger.log_hyperparams(hparams)
+
+
+def get_metric_value(metric_dict: dict, metric_name: str) -> float:
+ """Safely retrieves value of the metric logged in LightningModule."""
+
+ if not metric_name:
+ log.info("Metric name is None! Skipping metric value retrieval...")
+ return None
+
+ if metric_name not in metric_dict:
+ raise Exception(
+ f"Metric value not found! \n"
+ "Make sure metric name logged in LightningModule is correct!\n"
+ "Make sure `optimized_metric` name in `hparams_search` config is correct!"
+ )
+
+ metric_value = metric_dict[metric_name].item()
+ log.info(f"Retrieved metric value! <{metric_name}={metric_value}>")
+
+ return metric_value
+
+
+def close_loggers() -> None:
+ """Makes sure all loggers closed properly (prevents logging failure during multirun)."""
+
+ log.info("Closing loggers...")
+
+ if find_spec("wandb"): # if wandb is installed
+ import wandb
+
+ if wandb.run:
+ log.info("Closing wandb!")
+ wandb.finish()
diff --git a/WiLoR/wilor/utils/pose_utils.py b/WiLoR/wilor/utils/pose_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8d5855d272f495cd08730fd0625a9ed125a5e09
--- /dev/null
+++ b/WiLoR/wilor/utils/pose_utils.py
@@ -0,0 +1,352 @@
+"""
+Code adapted from: https://github.com/akanazawa/hmr/blob/master/src/benchmark/eval_util.py
+"""
+
+import torch
+import numpy as np
+from typing import Optional, Dict, List, Tuple
+
+def compute_similarity_transform(S1: torch.Tensor, S2: torch.Tensor) -> torch.Tensor:
+ """
+ Computes a similarity transform (sR, t) in a batched way that takes
+ a set of 3D points S1 (B, N, 3) closest to a set of 3D points S2 (B, N, 3),
+ where R is a 3x3 rotation matrix, t 3x1 translation, s scale.
+ i.e. solves the orthogonal Procrutes problem.
+ Args:
+ S1 (torch.Tensor): First set of points of shape (B, N, 3).
+ S2 (torch.Tensor): Second set of points of shape (B, N, 3).
+ Returns:
+ (torch.Tensor): The first set of points after applying the similarity transformation.
+ """
+
+ batch_size = S1.shape[0]
+ S1 = S1.permute(0, 2, 1)
+ S2 = S2.permute(0, 2, 1)
+ # 1. Remove mean.
+ mu1 = S1.mean(dim=2, keepdim=True)
+ mu2 = S2.mean(dim=2, keepdim=True)
+ X1 = S1 - mu1
+ X2 = S2 - mu2
+
+ # 2. Compute variance of X1 used for scale.
+ var1 = (X1**2).sum(dim=(1,2))
+
+ # 3. The outer product of X1 and X2.
+ K = torch.matmul(X1, X2.permute(0, 2, 1))
+
+ # 4. Solution that Maximizes trace(R'K) is R=U*V', where U, V are singular vectors of K.
+ U, s, V = torch.svd(K)
+ Vh = V.permute(0, 2, 1)
+
+ # Construct Z that fixes the orientation of R to get det(R)=1.
+ Z = torch.eye(U.shape[1], device=U.device).unsqueeze(0).repeat(batch_size, 1, 1)
+ Z[:, -1, -1] *= torch.sign(torch.linalg.det(torch.matmul(U, Vh)))
+
+ # Construct R.
+ R = torch.matmul(torch.matmul(V, Z), U.permute(0, 2, 1))
+
+ # 5. Recover scale.
+ trace = torch.matmul(R, K).diagonal(offset=0, dim1=-1, dim2=-2).sum(dim=-1)
+ scale = (trace / var1).unsqueeze(dim=-1).unsqueeze(dim=-1)
+
+ # 6. Recover translation.
+ t = mu2 - scale*torch.matmul(R, mu1)
+
+ # 7. Error:
+ S1_hat = scale*torch.matmul(R, S1) + t
+
+ return S1_hat.permute(0, 2, 1)
+
+def reconstruction_error(S1, S2) -> np.array:
+ """
+ Computes the mean Euclidean distance of 2 set of points S1, S2 after performing Procrustes alignment.
+ Args:
+ S1 (torch.Tensor): First set of points of shape (B, N, 3).
+ S2 (torch.Tensor): Second set of points of shape (B, N, 3).
+ Returns:
+ (np.array): Reconstruction error.
+ """
+ S1_hat = compute_similarity_transform(S1, S2)
+ re = torch.sqrt( ((S1_hat - S2)** 2).sum(dim=-1)).mean(dim=-1)
+ return re
+
+def eval_pose(pred_joints, gt_joints) -> Tuple[np.array, np.array]:
+ """
+ Compute joint errors in mm before and after Procrustes alignment.
+ Args:
+ pred_joints (torch.Tensor): Predicted 3D joints of shape (B, N, 3).
+ gt_joints (torch.Tensor): Ground truth 3D joints of shape (B, N, 3).
+ Returns:
+ Tuple[np.array, np.array]: Joint errors in mm before and after alignment.
+ """
+ # Absolute error (MPJPE)
+ mpjpe = torch.sqrt(((pred_joints - gt_joints) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
+
+ # Reconstruction_error
+ r_error = reconstruction_error(pred_joints, gt_joints).cpu().numpy()
+ return 1000 * mpjpe, 1000 * r_error
+
+class Evaluator:
+
+ def __init__(self,
+ dataset_length: int,
+ dataset: str,
+ keypoint_list: List,
+ pelvis_ind: int,
+ metrics: List = ['mode_mpjpe', 'mode_re', 'min_mpjpe', 'min_re'],
+ preds: List = ['vertices', 'keypoints_3d'],
+ pck_thresholds: Optional[List] = None):
+ """
+ Class used for evaluating trained models on different 3D pose datasets.
+ Args:
+ dataset_length (int): Total dataset length.
+ keypoint_list [List]: List of keypoints used for evaluation.
+ pelvis_ind (int): Index of pelvis keypoint; used for aligning the predictions and ground truth.
+ metrics [List]: List of evaluation metrics to record.
+ """
+ self.dataset_length = dataset_length
+ self.dataset = dataset
+ self.keypoint_list = keypoint_list
+ self.pelvis_ind = pelvis_ind
+ self.metrics = metrics
+ self.preds = preds
+ if self.metrics is not None:
+ for metric in self.metrics:
+ setattr(self, metric, np.zeros((dataset_length,)))
+ if self.preds is not None:
+ for pred in self.preds:
+ if pred == 'vertices':
+ self.vertices = np.zeros((dataset_length, 778, 3))
+ if pred == 'keypoints_3d':
+ self.keypoints_3d = np.zeros((dataset_length, 21, 3))
+ self.counter = 0
+ if pck_thresholds is None:
+ self.pck_evaluator = None
+ else:
+ self.pck_evaluator = EvaluatorPCK(pck_thresholds)
+
+ def log(self):
+ """
+ Print current evaluation metrics
+ """
+ if self.counter == 0:
+ print('Evaluation has not started')
+ return
+ print(f'{self.counter} / {self.dataset_length} samples')
+ if self.pck_evaluator is not None:
+ self.pck_evaluator.log()
+ if self.metrics is not None:
+ for metric in self.metrics:
+ if metric in ['mode_mpjpe', 'mode_re', 'min_mpjpe', 'min_re']:
+ unit = 'mm'
+ else:
+ unit = ''
+ print(f'{metric}: {getattr(self, metric)[:self.counter].mean()} {unit}')
+ print('***')
+
+ def get_metrics_dict(self) -> Dict:
+ """
+ Returns:
+ Dict: Dictionary of evaluation metrics.
+ """
+ d1 = {metric: getattr(self, metric)[:self.counter].mean() for metric in self.metrics}
+ if self.pck_evaluator is not None:
+ d2 = self.pck_evaluator.get_metrics_dict()
+ d1.update(d2)
+ return d1
+
+ def get_preds_dict(self) -> Dict:
+ """
+ Returns:
+ Dict: Dictionary of evaluation preds.
+ """
+ d1 = {pred: getattr(self, pred)[:self.counter] for pred in self.preds}
+ return d1
+
+ def __call__(self, output: Dict, batch: Dict, opt_output: Optional[Dict] = None):
+ """
+ Evaluate current batch.
+ Args:
+ output (Dict): Regression output.
+ batch (Dict): Dictionary containing images and their corresponding annotations.
+ opt_output (Dict): Optimization output.
+ """
+ if self.pck_evaluator is not None:
+ self.pck_evaluator(output, batch, opt_output)
+
+ pred_keypoints_3d = output['pred_keypoints_3d'].detach()
+ pred_keypoints_3d = pred_keypoints_3d[:,None,:,:]
+ batch_size = pred_keypoints_3d.shape[0]
+ num_samples = pred_keypoints_3d.shape[1]
+ gt_keypoints_3d = batch['keypoints_3d'][:, :, :-1].unsqueeze(1).repeat(1, num_samples, 1, 1)
+ pred_vertices = output['pred_vertices'].detach()
+
+ # Align predictions and ground truth such that the pelvis location is at the origin
+ pred_keypoints_3d -= pred_keypoints_3d[:, :, [self.pelvis_ind]]
+ gt_keypoints_3d -= gt_keypoints_3d[:, :, [self.pelvis_ind]]
+
+ # Compute joint errors
+ mpjpe, re = eval_pose(pred_keypoints_3d.reshape(batch_size * num_samples, -1, 3)[:, self.keypoint_list], gt_keypoints_3d.reshape(batch_size * num_samples, -1 ,3)[:, self.keypoint_list])
+ mpjpe = mpjpe.reshape(batch_size, num_samples)
+ re = re.reshape(batch_size, num_samples)
+
+ # Compute 2d keypoint errors
+ bbox_expand_factor = batch['bbox_expand_factor'][:,None,None,None].detach()
+ pred_keypoints_2d = output['pred_keypoints_2d'].detach()
+ pred_keypoints_2d = pred_keypoints_2d[:,None,:,:]*bbox_expand_factor
+ gt_keypoints_2d = batch['keypoints_2d'][:,None,:,:].repeat(1, num_samples, 1, 1)*bbox_expand_factor
+ conf = gt_keypoints_2d[:, :, :, -1].clone()
+ kp_err = torch.nn.functional.mse_loss(
+ pred_keypoints_2d,
+ gt_keypoints_2d[:, :, :, :-1],
+ reduction='none'
+ ).sum(dim=3)
+ kp_l2_loss = (conf * kp_err).mean(dim=2)
+ kp_l2_loss = kp_l2_loss.detach().cpu().numpy()
+
+ # Compute joint errors after optimization, if available.
+ if opt_output is not None:
+ opt_keypoints_3d = opt_output['model_joints']
+ opt_keypoints_3d -= opt_keypoints_3d[:, [self.pelvis_ind]]
+ opt_mpjpe, opt_re = eval_pose(opt_keypoints_3d[:, self.keypoint_list], gt_keypoints_3d[:, 0, self.keypoint_list])
+
+ # The 0-th sample always corresponds to the mode
+ if hasattr(self, 'mode_mpjpe'):
+ mode_mpjpe = mpjpe[:, 0]
+ self.mode_mpjpe[self.counter:self.counter+batch_size] = mode_mpjpe
+ if hasattr(self, 'mode_re'):
+ mode_re = re[:, 0]
+ self.mode_re[self.counter:self.counter+batch_size] = mode_re
+ if hasattr(self, 'mode_kpl2'):
+ mode_kpl2 = kp_l2_loss[:, 0]
+ self.mode_kpl2[self.counter:self.counter+batch_size] = mode_kpl2
+ if hasattr(self, 'min_mpjpe'):
+ min_mpjpe = mpjpe.min(axis=-1)
+ self.min_mpjpe[self.counter:self.counter+batch_size] = min_mpjpe
+ if hasattr(self, 'min_re'):
+ min_re = re.min(axis=-1)
+ self.min_re[self.counter:self.counter+batch_size] = min_re
+ if hasattr(self, 'min_kpl2'):
+ min_kpl2 = kp_l2_loss.min(axis=-1)
+ self.min_kpl2[self.counter:self.counter+batch_size] = min_kpl2
+ if hasattr(self, 'opt_mpjpe'):
+ self.opt_mpjpe[self.counter:self.counter+batch_size] = opt_mpjpe
+ if hasattr(self, 'opt_re'):
+ self.opt_re[self.counter:self.counter+batch_size] = opt_re
+ if hasattr(self, 'vertices'):
+ self.vertices[self.counter:self.counter+batch_size] = pred_vertices.cpu().numpy()
+ if hasattr(self, 'keypoints_3d'):
+ if self.dataset == 'HO3D-VAL':
+ pred_keypoints_3d = pred_keypoints_3d[:,:,[0,5,6,7,9,10,11,17,18,19,13,14,15,1,2,3,4,8,12,16,20]]
+ self.keypoints_3d[self.counter:self.counter+batch_size] = pred_keypoints_3d.squeeze().cpu().numpy()
+
+ self.counter += batch_size
+
+ if hasattr(self, 'mode_mpjpe') and hasattr(self, 'mode_re'):
+ return {
+ 'mode_mpjpe': mode_mpjpe,
+ 'mode_re': mode_re,
+ }
+ else:
+ return {}
+
+
+class EvaluatorPCK:
+
+ def __init__(self, thresholds: List = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5],):
+ """
+ Class used for evaluating trained models on different 3D pose datasets.
+ Args:
+ thresholds [List]: List of PCK thresholds to evaluate.
+ metrics [List]: List of evaluation metrics to record.
+ """
+ self.thresholds = thresholds
+ self.pred_kp_2d = []
+ self.gt_kp_2d = []
+ self.gt_conf_2d = []
+ self.scale = []
+ self.counter = 0
+
+ def log(self):
+ """
+ Print current evaluation metrics
+ """
+ if self.counter == 0:
+ print('Evaluation has not started')
+ return
+ print(f'{self.counter} samples')
+ metrics_dict = self.get_metrics_dict()
+ for metric in metrics_dict:
+ print(f'{metric}: {metrics_dict[metric]}')
+ print('***')
+
+ def get_metrics_dict(self) -> Dict:
+ """
+ Returns:
+ Dict: Dictionary of evaluation metrics.
+ """
+ pcks = self.compute_pcks()
+ metrics = {}
+ for thr, (acc,avg_acc,cnt) in zip(self.thresholds, pcks):
+ metrics.update({f'kp{i}_pck_{thr}': float(a) for i, a in enumerate(acc) if a>=0})
+ metrics.update({f'kpAvg_pck_{thr}': float(avg_acc)})
+ return metrics
+
+ def compute_pcks(self):
+ pred_kp_2d = np.concatenate(self.pred_kp_2d, axis=0)
+ gt_kp_2d = np.concatenate(self.gt_kp_2d, axis=0)
+ gt_conf_2d = np.concatenate(self.gt_conf_2d, axis=0)
+ scale = np.concatenate(self.scale, axis=0)
+ assert pred_kp_2d.shape == gt_kp_2d.shape
+ assert pred_kp_2d[..., 0].shape == gt_conf_2d.shape
+ assert pred_kp_2d.shape[1] == 1 # num_samples
+ assert scale.shape[0] == gt_conf_2d.shape[0] # num_samples
+
+ pcks = [
+ self.keypoint_pck_accuracy(
+ pred_kp_2d[:, 0, :, :],
+ gt_kp_2d[:, 0, :, :],
+ gt_conf_2d[:, 0, :]>0.5,
+ thr=thr,
+ scale = scale[:,None]
+ )
+ for thr in self.thresholds
+ ]
+ return pcks
+
+ def keypoint_pck_accuracy(self, pred, gt, conf, thr, scale):
+ dist = np.sqrt(np.sum((pred-gt)**2, axis=2))
+ all_joints = conf>0.5
+ correct_joints = np.logical_and(dist<=scale*thr, all_joints)
+ pck = correct_joints.sum(axis=0)/all_joints.sum(axis=0)
+ return pck, pck.mean(), pck.shape[0]
+
+ def __call__(self, output: Dict, batch: Dict, opt_output: Optional[Dict] = None):
+ """
+ Evaluate current batch.
+ Args:
+ output (Dict): Regression output.
+ batch (Dict): Dictionary containing images and their corresponding annotations.
+ opt_output (Dict): Optimization output.
+ """
+ pred_keypoints_2d = output['pred_keypoints_2d'].detach()
+ num_samples = 1
+ batch_size = pred_keypoints_2d.shape[0]
+
+ right = batch['right'].detach()
+ pred_keypoints_2d[:,:,0] = (2*right[:,None]-1)*pred_keypoints_2d[:,:,0]
+ box_size = batch['box_size'].detach()
+ box_center = batch['box_center'].detach()
+ bbox_expand_factor = batch['bbox_expand_factor'].detach()
+ scale = box_size/bbox_expand_factor
+ bbox_expand_factor = bbox_expand_factor[:,None,None,None]
+ pred_keypoints_2d = pred_keypoints_2d*box_size[:,None,None]+box_center[:,None]
+ pred_keypoints_2d = pred_keypoints_2d[:,None,:,:]
+ gt_keypoints_2d = batch['orig_keypoints_2d'][:,None,:,:].repeat(1, num_samples, 1, 1)
+
+ self.pred_kp_2d.append(pred_keypoints_2d[:, :, :, :2].detach().cpu().numpy())
+ self.gt_conf_2d.append(gt_keypoints_2d[:, :, :, -1].detach().cpu().numpy())
+ self.gt_kp_2d.append(gt_keypoints_2d[:, :, :, :2].detach().cpu().numpy())
+ self.scale.append(scale.detach().cpu().numpy())
+
+ self.counter += batch_size
\ No newline at end of file
diff --git a/WiLoR/wilor/utils/pylogger.py b/WiLoR/wilor/utils/pylogger.py
new file mode 100644
index 0000000000000000000000000000000000000000..68ea727c280fac2f90f71a07718b6050a568ecfb
--- /dev/null
+++ b/WiLoR/wilor/utils/pylogger.py
@@ -0,0 +1,17 @@
+import logging
+
+from pytorch_lightning.utilities import rank_zero_only
+
+
+def get_pylogger(name=__name__) -> logging.Logger:
+ """Initializes multi-GPU-friendly python command line logger."""
+
+ logger = logging.getLogger(name)
+
+ # this ensures all logging levels get marked with the rank zero decorator
+ # otherwise logs would get multiplied for each GPU process in multi-GPU setup
+ logging_levels = ("debug", "info", "warning", "error", "exception", "fatal", "critical")
+ for level in logging_levels:
+ setattr(logger, level, rank_zero_only(getattr(logger, level)))
+
+ return logger
diff --git a/WiLoR/wilor/utils/render_openpose.py b/WiLoR/wilor/utils/render_openpose.py
new file mode 100644
index 0000000000000000000000000000000000000000..23af8cf5ec74e1a79465f870794ff0be85c6bd45
--- /dev/null
+++ b/WiLoR/wilor/utils/render_openpose.py
@@ -0,0 +1,191 @@
+"""
+Render OpenPose keypoints.
+Code was ported to Python from the official C++ implementation https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/utilities/keypoint.cpp
+"""
+import cv2
+import math
+import numpy as np
+from typing import List, Tuple
+
+def get_keypoints_rectangle(keypoints: np.array, threshold: float) -> Tuple[float, float, float]:
+ """
+ Compute rectangle enclosing keypoints above the threshold.
+ Args:
+ keypoints (np.array): Keypoint array of shape (N, 3).
+ threshold (float): Confidence visualization threshold.
+ Returns:
+ Tuple[float, float, float]: Rectangle width, height and area.
+ """
+ valid_ind = keypoints[:, -1] > threshold
+ if valid_ind.sum() > 0:
+ valid_keypoints = keypoints[valid_ind][:, :-1]
+ max_x = valid_keypoints[:,0].max()
+ max_y = valid_keypoints[:,1].max()
+ min_x = valid_keypoints[:,0].min()
+ min_y = valid_keypoints[:,1].min()
+ width = max_x - min_x
+ height = max_y - min_y
+ area = width * height
+ return width, height, area
+ else:
+ return 0,0,0
+
+def render_keypoints(img: np.array,
+ keypoints: np.array,
+ pairs: List,
+ colors: List,
+ thickness_circle_ratio: float,
+ thickness_line_ratio_wrt_circle: float,
+ pose_scales: List,
+ threshold: float = 0.1,
+ alpha: float = 1.0) -> np.array:
+ """
+ Render keypoints on input image.
+ Args:
+ img (np.array): Input image of shape (H, W, 3) with pixel values in the [0,255] range.
+ keypoints (np.array): Keypoint array of shape (N, 3).
+ pairs (List): List of keypoint pairs per limb.
+ colors: (List): List of colors per keypoint.
+ thickness_circle_ratio (float): Circle thickness ratio.
+ thickness_line_ratio_wrt_circle (float): Line thickness ratio wrt the circle.
+ pose_scales (List): List of pose scales.
+ threshold (float): Only visualize keypoints with confidence above the threshold.
+ Returns:
+ (np.array): Image of shape (H, W, 3) with keypoints drawn on top of the original image.
+ """
+ img_orig = img.copy()
+ width, height = img.shape[1], img.shape[2]
+ area = width * height
+
+ lineType = 8
+ shift = 0
+ numberColors = len(colors)
+ thresholdRectangle = 0.1
+
+ person_width, person_height, person_area = get_keypoints_rectangle(keypoints, thresholdRectangle)
+ if person_area > 0:
+ ratioAreas = min(1, max(person_width / width, person_height / height))
+ thicknessRatio = np.maximum(np.round(math.sqrt(area) * thickness_circle_ratio * ratioAreas), 2)
+ thicknessCircle = np.maximum(1, thicknessRatio if ratioAreas > 0.05 else -np.ones_like(thicknessRatio))
+ thicknessLine = np.maximum(1, np.round(thicknessRatio * thickness_line_ratio_wrt_circle))
+ radius = thicknessRatio / 2
+
+ img = np.ascontiguousarray(img.copy())
+ for i, pair in enumerate(pairs):
+ index1, index2 = pair
+ if keypoints[index1, -1] > threshold and keypoints[index2, -1] > threshold:
+ thicknessLineScaled = int(round(min(thicknessLine[index1], thicknessLine[index2]) * pose_scales[0]))
+ colorIndex = index2
+ color = colors[colorIndex % numberColors]
+ keypoint1 = keypoints[index1, :-1].astype(np.int_)
+ keypoint2 = keypoints[index2, :-1].astype(np.int_)
+ cv2.line(img, tuple(keypoint1.tolist()), tuple(keypoint2.tolist()), tuple(color.tolist()), thicknessLineScaled, lineType, shift)
+ for part in range(len(keypoints)):
+ faceIndex = part
+ if keypoints[faceIndex, -1] > threshold:
+ radiusScaled = int(round(radius[faceIndex] * pose_scales[0]))
+ thicknessCircleScaled = int(round(thicknessCircle[faceIndex] * pose_scales[0]))
+ colorIndex = part
+ color = colors[colorIndex % numberColors]
+ center = keypoints[faceIndex, :-1].astype(np.int_)
+ cv2.circle(img, tuple(center.tolist()), radiusScaled, tuple(color.tolist()), thicknessCircleScaled, lineType, shift)
+ return img
+
+def render_hand_keypoints(img, right_hand_keypoints, threshold=0.1, use_confidence=False, map_fn=lambda x: np.ones_like(x), alpha=1.0):
+ if use_confidence and map_fn is not None:
+ #thicknessCircleRatioLeft = 1./50 * map_fn(left_hand_keypoints[:, -1])
+ thicknessCircleRatioRight = 1./50 * map_fn(right_hand_keypoints[:, -1])
+ else:
+ #thicknessCircleRatioLeft = 1./50 * np.ones(left_hand_keypoints.shape[0])
+ thicknessCircleRatioRight = 1./50 * np.ones(right_hand_keypoints.shape[0])
+ thicknessLineRatioWRTCircle = 0.75
+ pairs = [0,1, 1,2, 2,3, 3,4, 0,5, 5,6, 6,7, 7,8, 0,9, 9,10, 10,11, 11,12, 0,13, 13,14, 14,15, 15,16, 0,17, 17,18, 18,19, 19,20]
+ pairs = np.array(pairs).reshape(-1,2)
+
+ colors = [100., 100., 100.,
+ 100., 0., 0.,
+ 150., 0., 0.,
+ 200., 0., 0.,
+ 255., 0., 0.,
+ 100., 100., 0.,
+ 150., 150., 0.,
+ 200., 200., 0.,
+ 255., 255., 0.,
+ 0., 100., 50.,
+ 0., 150., 75.,
+ 0., 200., 100.,
+ 0., 255., 125.,
+ 0., 50., 100.,
+ 0., 75., 150.,
+ 0., 100., 200.,
+ 0., 125., 255.,
+ 100., 0., 100.,
+ 150., 0., 150.,
+ 200., 0., 200.,
+ 255., 0., 255.]
+ colors = np.array(colors).reshape(-1,3)
+ #colors = np.zeros_like(colors)
+ poseScales = [1]
+ #img = render_keypoints(img, left_hand_keypoints, pairs, colors, thicknessCircleRatioLeft, thicknessLineRatioWRTCircle, poseScales, threshold, alpha=alpha)
+ img = render_keypoints(img, right_hand_keypoints, pairs, colors, thicknessCircleRatioRight, thicknessLineRatioWRTCircle, poseScales, threshold, alpha=alpha)
+ #img = render_keypoints(img, right_hand_keypoints, pairs, colors, thickness_circle_ratio, thickness_line_ratio_wrt_circle, pose_scales, 0.1)
+ return img
+
+def render_body_keypoints(img: np.array,
+ body_keypoints: np.array) -> np.array:
+ """
+ Render OpenPose body keypoints on input image.
+ Args:
+ img (np.array): Input image of shape (H, W, 3) with pixel values in the [0,255] range.
+ body_keypoints (np.array): Keypoint array of shape (N, 3); 3 <====> (x, y, confidence).
+ Returns:
+ (np.array): Image of shape (H, W, 3) with keypoints drawn on top of the original image.
+ """
+
+ thickness_circle_ratio = 1./75. * np.ones(body_keypoints.shape[0])
+ thickness_line_ratio_wrt_circle = 0.75
+ pairs = []
+ pairs = [1,8,1,2,1,5,2,3,3,4,5,6,6,7,8,9,9,10,10,11,8,12,12,13,13,14,1,0,0,15,15,17,0,16,16,18,14,19,19,20,14,21,11,22,22,23,11,24]
+ pairs = np.array(pairs).reshape(-1,2)
+ colors = [255., 0., 85.,
+ 255., 0., 0.,
+ 255., 85., 0.,
+ 255., 170., 0.,
+ 255., 255., 0.,
+ 170., 255., 0.,
+ 85., 255., 0.,
+ 0., 255., 0.,
+ 255., 0., 0.,
+ 0., 255., 85.,
+ 0., 255., 170.,
+ 0., 255., 255.,
+ 0., 170., 255.,
+ 0., 85., 255.,
+ 0., 0., 255.,
+ 255., 0., 170.,
+ 170., 0., 255.,
+ 255., 0., 255.,
+ 85., 0., 255.,
+ 0., 0., 255.,
+ 0., 0., 255.,
+ 0., 0., 255.,
+ 0., 255., 255.,
+ 0., 255., 255.,
+ 0., 255., 255.]
+ colors = np.array(colors).reshape(-1,3)
+ pose_scales = [1]
+ return render_keypoints(img, body_keypoints, pairs, colors, thickness_circle_ratio, thickness_line_ratio_wrt_circle, pose_scales, 0.1)
+
+def render_openpose(img: np.array,
+ hand_keypoints: np.array) -> np.array:
+ """
+ Render keypoints in the OpenPose format on input image.
+ Args:
+ img (np.array): Input image of shape (H, W, 3) with pixel values in the [0,255] range.
+ body_keypoints (np.array): Keypoint array of shape (N, 3); 3 <====> (x, y, confidence).
+ Returns:
+ (np.array): Image of shape (H, W, 3) with keypoints drawn on top of the original image.
+ """
+ #img = render_body_keypoints(img, body_keypoints)
+ img = render_hand_keypoints(img, hand_keypoints)
+ return img
diff --git a/WiLoR/wilor/utils/renderer.py b/WiLoR/wilor/utils/renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a7aafd58841277b6e0e335daf0965160d34b745
--- /dev/null
+++ b/WiLoR/wilor/utils/renderer.py
@@ -0,0 +1,423 @@
+import os
+if 'PYOPENGL_PLATFORM' not in os.environ:
+ os.environ['PYOPENGL_PLATFORM'] = 'egl'
+import torch
+import numpy as np
+import pyrender
+import trimesh
+import cv2
+from yacs.config import CfgNode
+from typing import List, Optional
+
+def cam_crop_to_full(cam_bbox, box_center, box_size, img_size, focal_length=5000.):
+ # Convert cam_bbox to full image
+ img_w, img_h = img_size[:, 0], img_size[:, 1]
+ cx, cy, b = box_center[:, 0], box_center[:, 1], box_size
+ w_2, h_2 = img_w / 2., img_h / 2.
+ bs = b * cam_bbox[:, 0] + 1e-9
+ tz = 2 * focal_length / bs
+ tx = (2 * (cx - w_2) / bs) + cam_bbox[:, 1]
+ ty = (2 * (cy - h_2) / bs) + cam_bbox[:, 2]
+ full_cam = torch.stack([tx, ty, tz], dim=-1)
+ return full_cam
+
+def get_light_poses(n_lights=5, elevation=np.pi / 3, dist=12):
+ # get lights in a circle around origin at elevation
+ thetas = elevation * np.ones(n_lights)
+ phis = 2 * np.pi * np.arange(n_lights) / n_lights
+ poses = []
+ trans = make_translation(torch.tensor([0, 0, dist]))
+ for phi, theta in zip(phis, thetas):
+ rot = make_rotation(rx=-theta, ry=phi, order="xyz")
+ poses.append((rot @ trans).numpy())
+ return poses
+
+def make_translation(t):
+ return make_4x4_pose(torch.eye(3), t)
+
+def make_rotation(rx=0, ry=0, rz=0, order="xyz"):
+ Rx = rotx(rx)
+ Ry = roty(ry)
+ Rz = rotz(rz)
+ if order == "xyz":
+ R = Rz @ Ry @ Rx
+ elif order == "xzy":
+ R = Ry @ Rz @ Rx
+ elif order == "yxz":
+ R = Rz @ Rx @ Ry
+ elif order == "yzx":
+ R = Rx @ Rz @ Ry
+ elif order == "zyx":
+ R = Rx @ Ry @ Rz
+ elif order == "zxy":
+ R = Ry @ Rx @ Rz
+ return make_4x4_pose(R, torch.zeros(3))
+
+def make_4x4_pose(R, t):
+ """
+ :param R (*, 3, 3)
+ :param t (*, 3)
+ return (*, 4, 4)
+ """
+ dims = R.shape[:-2]
+ pose_3x4 = torch.cat([R, t.view(*dims, 3, 1)], dim=-1)
+ bottom = (
+ torch.tensor([0, 0, 0, 1], device=R.device)
+ .reshape(*(1,) * len(dims), 1, 4)
+ .expand(*dims, 1, 4)
+ )
+ return torch.cat([pose_3x4, bottom], dim=-2)
+
+
+def rotx(theta):
+ return torch.tensor(
+ [
+ [1, 0, 0],
+ [0, np.cos(theta), -np.sin(theta)],
+ [0, np.sin(theta), np.cos(theta)],
+ ],
+ dtype=torch.float32,
+ )
+
+
+def roty(theta):
+ return torch.tensor(
+ [
+ [np.cos(theta), 0, np.sin(theta)],
+ [0, 1, 0],
+ [-np.sin(theta), 0, np.cos(theta)],
+ ],
+ dtype=torch.float32,
+ )
+
+
+def rotz(theta):
+ return torch.tensor(
+ [
+ [np.cos(theta), -np.sin(theta), 0],
+ [np.sin(theta), np.cos(theta), 0],
+ [0, 0, 1],
+ ],
+ dtype=torch.float32,
+ )
+
+
+def create_raymond_lights() -> List[pyrender.Node]:
+ """
+ Return raymond light nodes for the scene.
+ """
+ thetas = np.pi * np.array([1.0 / 6.0, 1.0 / 6.0, 1.0 / 6.0])
+ phis = np.pi * np.array([0.0, 2.0 / 3.0, 4.0 / 3.0])
+
+ nodes = []
+
+ for phi, theta in zip(phis, thetas):
+ xp = np.sin(theta) * np.cos(phi)
+ yp = np.sin(theta) * np.sin(phi)
+ zp = np.cos(theta)
+
+ z = np.array([xp, yp, zp])
+ z = z / np.linalg.norm(z)
+ x = np.array([-z[1], z[0], 0.0])
+ if np.linalg.norm(x) == 0:
+ x = np.array([1.0, 0.0, 0.0])
+ x = x / np.linalg.norm(x)
+ y = np.cross(z, x)
+
+ matrix = np.eye(4)
+ matrix[:3,:3] = np.c_[x,y,z]
+ nodes.append(pyrender.Node(
+ light=pyrender.DirectionalLight(color=np.ones(3), intensity=1.0),
+ matrix=matrix
+ ))
+
+ return nodes
+
+class Renderer:
+
+ def __init__(self, cfg: CfgNode, faces: np.array):
+ """
+ Wrapper around the pyrender renderer to render MANO meshes.
+ Args:
+ cfg (CfgNode): Model config file.
+ faces (np.array): Array of shape (F, 3) containing the mesh faces.
+ """
+ self.cfg = cfg
+ self.focal_length = cfg.EXTRA.FOCAL_LENGTH
+ self.img_res = cfg.MODEL.IMAGE_SIZE
+
+ # add faces that make the hand mesh watertight
+ faces_new = np.array([[92, 38, 234],
+ [234, 38, 239],
+ [38, 122, 239],
+ [239, 122, 279],
+ [122, 118, 279],
+ [279, 118, 215],
+ [118, 117, 215],
+ [215, 117, 214],
+ [117, 119, 214],
+ [214, 119, 121],
+ [119, 120, 121],
+ [121, 120, 78],
+ [120, 108, 78],
+ [78, 108, 79]])
+ faces = np.concatenate([faces, faces_new], axis=0)
+
+ self.camera_center = [self.img_res // 2, self.img_res // 2]
+ self.faces = faces
+ self.faces_left = self.faces[:,[0,2,1]]
+
+ def __call__(self,
+ vertices: np.array,
+ camera_translation: np.array,
+ image: torch.Tensor,
+ full_frame: bool = False,
+ imgname: Optional[str] = None,
+ side_view=False, rot_angle=90,
+ mesh_base_color=(1.0, 1.0, 0.9),
+ scene_bg_color=(0,0,0),
+ return_rgba=False,
+ ) -> np.array:
+ """
+ Render meshes on input image
+ Args:
+ vertices (np.array): Array of shape (V, 3) containing the mesh vertices.
+ camera_translation (np.array): Array of shape (3,) with the camera translation.
+ image (torch.Tensor): Tensor of shape (3, H, W) containing the image crop with normalized pixel values.
+ full_frame (bool): If True, then render on the full image.
+ imgname (Optional[str]): Contains the original image filenamee. Used only if full_frame == True.
+ """
+
+ if full_frame:
+ image = cv2.imread(imgname).astype(np.float32)[:, :, ::-1] / 255.
+ else:
+ image = image.clone() * torch.tensor(self.cfg.MODEL.IMAGE_STD, device=image.device).reshape(3,1,1)
+ image = image + torch.tensor(self.cfg.MODEL.IMAGE_MEAN, device=image.device).reshape(3,1,1)
+ image = image.permute(1, 2, 0).cpu().numpy()
+
+ renderer = pyrender.OffscreenRenderer(viewport_width=image.shape[1],
+ viewport_height=image.shape[0],
+ point_size=1.0)
+ material = pyrender.MetallicRoughnessMaterial(
+ metallicFactor=0.0,
+ alphaMode='OPAQUE',
+ baseColorFactor=(*mesh_base_color, 1.0))
+
+ camera_translation[0] *= -1.
+
+ mesh = trimesh.Trimesh(vertices.copy(), self.faces.copy())
+ if side_view:
+ rot = trimesh.transformations.rotation_matrix(
+ np.radians(rot_angle), [0, 1, 0])
+ mesh.apply_transform(rot)
+ rot = trimesh.transformations.rotation_matrix(
+ np.radians(180), [1, 0, 0])
+ mesh.apply_transform(rot)
+ mesh = pyrender.Mesh.from_trimesh(mesh, material=material)
+
+ scene = pyrender.Scene(bg_color=[*scene_bg_color, 0.0],
+ ambient_light=(0.3, 0.3, 0.3))
+ scene.add(mesh, 'mesh')
+
+ camera_pose = np.eye(4)
+ camera_pose[:3, 3] = camera_translation
+ camera_center = [image.shape[1] / 2., image.shape[0] / 2.]
+ camera = pyrender.IntrinsicsCamera(fx=self.focal_length, fy=self.focal_length,
+ cx=camera_center[0], cy=camera_center[1], zfar=1e12)
+ scene.add(camera, pose=camera_pose)
+
+
+ light_nodes = create_raymond_lights()
+ for node in light_nodes:
+ scene.add_node(node)
+
+ color, rend_depth = renderer.render(scene, flags=pyrender.RenderFlags.RGBA)
+ color = color.astype(np.float32) / 255.0
+ renderer.delete()
+
+ if return_rgba:
+ return color
+
+ valid_mask = (color[:, :, -1])[:, :, np.newaxis]
+ if not side_view:
+ output_img = (color[:, :, :3] * valid_mask + (1 - valid_mask) * image)
+ else:
+ output_img = color[:, :, :3]
+
+ output_img = output_img.astype(np.float32)
+ return output_img
+
+ def vertices_to_trimesh(self, vertices, camera_translation, mesh_base_color=(1.0, 1.0, 0.9),
+ rot_axis=[1,0,0], rot_angle=0, is_right=1):
+ # material = pyrender.MetallicRoughnessMaterial(
+ # metallicFactor=0.0,
+ # alphaMode='OPAQUE',
+ # baseColorFactor=(*mesh_base_color, 1.0))
+ vertex_colors = np.array([(*mesh_base_color, 1.0)] * vertices.shape[0])
+ if is_right:
+ mesh = trimesh.Trimesh(vertices.copy() + camera_translation, self.faces.copy(), vertex_colors=vertex_colors)
+ else:
+ mesh = trimesh.Trimesh(vertices.copy() + camera_translation, self.faces_left.copy(), vertex_colors=vertex_colors)
+ # mesh = trimesh.Trimesh(vertices.copy(), self.faces.copy())
+
+ rot = trimesh.transformations.rotation_matrix(
+ np.radians(rot_angle), rot_axis)
+ mesh.apply_transform(rot)
+
+ rot = trimesh.transformations.rotation_matrix(
+ np.radians(180), [1, 0, 0])
+ mesh.apply_transform(rot)
+ return mesh
+
+ def render_rgba(
+ self,
+ vertices: np.array,
+ cam_t = None,
+ rot=None,
+ rot_axis=[1,0,0],
+ rot_angle=0,
+ camera_z=3,
+ # camera_translation: np.array,
+ mesh_base_color=(1.0, 1.0, 0.9),
+ scene_bg_color=(0,0,0),
+ render_res=[256, 256],
+ focal_length=None,
+ is_right=None,
+ ):
+
+ renderer = pyrender.OffscreenRenderer(viewport_width=render_res[0],
+ viewport_height=render_res[1],
+ point_size=1.0)
+ # material = pyrender.MetallicRoughnessMaterial(
+ # metallicFactor=0.0,
+ # alphaMode='OPAQUE',
+ # baseColorFactor=(*mesh_base_color, 1.0))
+
+ focal_length = focal_length if focal_length is not None else self.focal_length
+
+ if cam_t is not None:
+ camera_translation = cam_t.copy()
+ camera_translation[0] *= -1.
+ else:
+ camera_translation = np.array([0, 0, camera_z * focal_length/render_res[1]])
+
+ mesh = self.vertices_to_trimesh(vertices, np.array([0, 0, 0]), mesh_base_color, rot_axis, rot_angle, is_right=is_right)
+ mesh = pyrender.Mesh.from_trimesh(mesh)
+ # mesh = pyrender.Mesh.from_trimesh(mesh, material=material)
+
+ scene = pyrender.Scene(bg_color=[*scene_bg_color, 0.0],
+ ambient_light=(0.3, 0.3, 0.3))
+ scene.add(mesh, 'mesh')
+
+ camera_pose = np.eye(4)
+ camera_pose[:3, 3] = camera_translation
+ camera_center = [render_res[0] / 2., render_res[1] / 2.]
+ camera = pyrender.IntrinsicsCamera(fx=focal_length, fy=focal_length,
+ cx=camera_center[0], cy=camera_center[1], zfar=1e12)
+
+ # Create camera node and add it to pyRender scene
+ camera_node = pyrender.Node(camera=camera, matrix=camera_pose)
+ scene.add_node(camera_node)
+ self.add_point_lighting(scene, camera_node)
+ self.add_lighting(scene, camera_node)
+
+ light_nodes = create_raymond_lights()
+ for node in light_nodes:
+ scene.add_node(node)
+
+ color, rend_depth = renderer.render(scene, flags=pyrender.RenderFlags.RGBA)
+ color = color.astype(np.float32) / 255.0
+ renderer.delete()
+
+ return color
+
+ def render_rgba_multiple(
+ self,
+ vertices: List[np.array],
+ cam_t: List[np.array],
+ rot_axis=[1,0,0],
+ rot_angle=0,
+ mesh_base_color=(1.0, 1.0, 0.9),
+ scene_bg_color=(0,0,0),
+ render_res=[256, 256],
+ focal_length=None,
+ is_right=None,
+ ):
+
+ renderer = pyrender.OffscreenRenderer(viewport_width=render_res[0],
+ viewport_height=render_res[1],
+ point_size=1.0)
+ # material = pyrender.MetallicRoughnessMaterial(
+ # metallicFactor=0.0,
+ # alphaMode='OPAQUE',
+ # baseColorFactor=(*mesh_base_color, 1.0))
+
+ if is_right is None:
+ is_right = [1 for _ in range(len(vertices))]
+
+ mesh_list = [pyrender.Mesh.from_trimesh(self.vertices_to_trimesh(vvv, ttt.copy(), mesh_base_color, rot_axis, rot_angle, is_right=sss)) for vvv,ttt,sss in zip(vertices, cam_t, is_right)]
+
+ scene = pyrender.Scene(bg_color=[*scene_bg_color, 0.0],
+ ambient_light=(0.3, 0.3, 0.3))
+ for i,mesh in enumerate(mesh_list):
+ scene.add(mesh, f'mesh_{i}')
+
+ camera_pose = np.eye(4)
+ # camera_pose[:3, 3] = camera_translation
+ camera_center = [render_res[0] / 2., render_res[1] / 2.]
+ focal_length = focal_length if focal_length is not None else self.focal_length
+ camera = pyrender.IntrinsicsCamera(fx=focal_length, fy=focal_length,
+ cx=camera_center[0], cy=camera_center[1], zfar=1e12)
+
+ # Create camera node and add it to pyRender scene
+ camera_node = pyrender.Node(camera=camera, matrix=camera_pose)
+ scene.add_node(camera_node)
+ self.add_point_lighting(scene, camera_node)
+ self.add_lighting(scene, camera_node)
+
+ light_nodes = create_raymond_lights()
+ for node in light_nodes:
+ scene.add_node(node)
+
+ color, rend_depth = renderer.render(scene, flags=pyrender.RenderFlags.RGBA)
+ color = color.astype(np.float32) / 255.0
+ renderer.delete()
+
+ return color
+
+ def add_lighting(self, scene, cam_node, color=np.ones(3), intensity=1.0):
+ # from phalp.visualize.py_renderer import get_light_poses
+ light_poses = get_light_poses()
+ light_poses.append(np.eye(4))
+ cam_pose = scene.get_pose(cam_node)
+ for i, pose in enumerate(light_poses):
+ matrix = cam_pose @ pose
+ node = pyrender.Node(
+ name=f"light-{i:02d}",
+ light=pyrender.DirectionalLight(color=color, intensity=intensity),
+ matrix=matrix,
+ )
+ if scene.has_node(node):
+ continue
+ scene.add_node(node)
+
+ def add_point_lighting(self, scene, cam_node, color=np.ones(3), intensity=1.0):
+ # from phalp.visualize.py_renderer import get_light_poses
+ light_poses = get_light_poses(dist=0.5)
+ light_poses.append(np.eye(4))
+ cam_pose = scene.get_pose(cam_node)
+ for i, pose in enumerate(light_poses):
+ matrix = cam_pose @ pose
+ # node = pyrender.Node(
+ # name=f"light-{i:02d}",
+ # light=pyrender.DirectionalLight(color=color, intensity=intensity),
+ # matrix=matrix,
+ # )
+ node = pyrender.Node(
+ name=f"plight-{i:02d}",
+ light=pyrender.PointLight(color=color, intensity=intensity),
+ matrix=matrix,
+ )
+ if scene.has_node(node):
+ continue
+ scene.add_node(node)
diff --git a/WiLoR/wilor/utils/rich_utils.py b/WiLoR/wilor/utils/rich_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..6918c2abe1daecc2f80d8a04e836507b1d4f5f7c
--- /dev/null
+++ b/WiLoR/wilor/utils/rich_utils.py
@@ -0,0 +1,105 @@
+from pathlib import Path
+from typing import Sequence
+
+import rich
+import rich.syntax
+import rich.tree
+from hydra.core.hydra_config import HydraConfig
+from omegaconf import DictConfig, OmegaConf, open_dict
+from pytorch_lightning.utilities import rank_zero_only
+from rich.prompt import Prompt
+
+from . import pylogger
+
+log = pylogger.get_pylogger(__name__)
+
+
+@rank_zero_only
+def print_config_tree(
+ cfg: DictConfig,
+ print_order: Sequence[str] = (
+ "datamodule",
+ "model",
+ "callbacks",
+ "logger",
+ "trainer",
+ "paths",
+ "extras",
+ ),
+ resolve: bool = False,
+ save_to_file: bool = False,
+) -> None:
+ """Prints content of DictConfig using Rich library and its tree structure.
+
+ Args:
+ cfg (DictConfig): Configuration composed by Hydra.
+ print_order (Sequence[str], optional): Determines in what order config components are printed.
+ resolve (bool, optional): Whether to resolve reference fields of DictConfig.
+ save_to_file (bool, optional): Whether to export config to the hydra output folder.
+ """
+
+ style = "dim"
+ tree = rich.tree.Tree("CONFIG", style=style, guide_style=style)
+
+ queue = []
+
+ # add fields from `print_order` to queue
+ for field in print_order:
+ queue.append(field) if field in cfg else log.warning(
+ f"Field '{field}' not found in config. Skipping '{field}' config printing..."
+ )
+
+ # add all the other fields to queue (not specified in `print_order`)
+ for field in cfg:
+ if field not in queue:
+ queue.append(field)
+
+ # generate config tree from queue
+ for field in queue:
+ branch = tree.add(field, style=style, guide_style=style)
+
+ config_group = cfg[field]
+ if isinstance(config_group, DictConfig):
+ branch_content = OmegaConf.to_yaml(config_group, resolve=resolve)
+ else:
+ branch_content = str(config_group)
+
+ branch.add(rich.syntax.Syntax(branch_content, "yaml"))
+
+ # print config tree
+ rich.print(tree)
+
+ # save config tree to file
+ if save_to_file:
+ with open(Path(cfg.paths.output_dir, "config_tree.log"), "w") as file:
+ rich.print(tree, file=file)
+
+
+@rank_zero_only
+def enforce_tags(cfg: DictConfig, save_to_file: bool = False) -> None:
+ """Prompts user to input tags from command line if no tags are provided in config."""
+
+ if not cfg.get("tags"):
+ if "id" in HydraConfig().cfg.hydra.job:
+ raise ValueError("Specify tags before launching a multirun!")
+
+ log.warning("No tags provided in config. Prompting user to input tags...")
+ tags = Prompt.ask("Enter a list of comma separated tags", default="dev")
+ tags = [t.strip() for t in tags.split(",") if t != ""]
+
+ with open_dict(cfg):
+ cfg.tags = tags
+
+ log.info(f"Tags: {cfg.tags}")
+
+ if save_to_file:
+ with open(Path(cfg.paths.output_dir, "tags.log"), "w") as file:
+ rich.print(cfg.tags, file=file)
+
+
+if __name__ == "__main__":
+ from hydra import compose, initialize
+
+ with initialize(version_base="1.2", config_path="../../configs"):
+ cfg = compose(config_name="train.yaml", return_hydra_config=False, overrides=[])
+ print_config_tree(cfg, resolve=False, save_to_file=False)
diff --git a/WiLoR/wilor/utils/skeleton_renderer.py b/WiLoR/wilor/utils/skeleton_renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc2563f5b0b36d704922f4ad488392e9835c9b3e
--- /dev/null
+++ b/WiLoR/wilor/utils/skeleton_renderer.py
@@ -0,0 +1,124 @@
+import torch
+import numpy as np
+import trimesh
+from typing import Optional
+from yacs.config import CfgNode
+
+from .geometry import perspective_projection
+from .render_openpose import render_openpose
+
+class SkeletonRenderer:
+
+ def __init__(self, cfg: CfgNode):
+ """
+ Object used to render 3D keypoints. Faster for use during training.
+ Args:
+ cfg (CfgNode): Model config file.
+ """
+ self.cfg = cfg
+
+ def __call__(self,
+ pred_keypoints_3d: torch.Tensor,
+ gt_keypoints_3d: torch.Tensor,
+ gt_keypoints_2d: torch.Tensor,
+ images: Optional[np.array] = None,
+ camera_translation: Optional[torch.Tensor] = None) -> np.array:
+ """
+ Render batch of 3D keypoints.
+ Args:
+ pred_keypoints_3d (torch.Tensor): Tensor of shape (B, S, N, 3) containing a batch of predicted 3D keypoints, with S samples per image.
+ gt_keypoints_3d (torch.Tensor): Tensor of shape (B, N, 4) containing corresponding ground truth 3D keypoints; last value is the confidence.
+ gt_keypoints_2d (torch.Tensor): Tensor of shape (B, N, 3) containing corresponding ground truth 2D keypoints.
+ images (torch.Tensor): Tensor of shape (B, H, W, 3) containing images with values in the [0,255] range.
+ camera_translation (torch.Tensor): Tensor of shape (B, 3) containing the camera translation.
+ Returns:
+ np.array : Image with the following layout. Each row contains the a) input image,
+ b) image with gt 2D keypoints,
+ c) image with projected gt 3D keypoints,
+ d_1, ... , d_S) image with projected predicted 3D keypoints,
+ e) gt 3D keypoints rendered from a side view,
+ f_1, ... , f_S) predicted 3D keypoints frorm a side view
+ """
+ batch_size = pred_keypoints_3d.shape[0]
+# num_samples = pred_keypoints_3d.shape[1]
+ pred_keypoints_3d = pred_keypoints_3d.clone().cpu().float()
+ gt_keypoints_3d = gt_keypoints_3d.clone().cpu().float()
+ gt_keypoints_3d[:, :, :-1] = gt_keypoints_3d[:, :, :-1] - gt_keypoints_3d[:, [0], :-1] + pred_keypoints_3d[:, [0]]
+ gt_keypoints_2d = gt_keypoints_2d.clone().cpu().float().numpy()
+ gt_keypoints_2d[:, :, :-1] = self.cfg.MODEL.IMAGE_SIZE * (gt_keypoints_2d[:, :, :-1] + 1.0) / 2.0
+
+ #openpose_indices = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
+ #gt_indices = [12, 8, 7, 6, 9, 10, 11, 14, 2, 1, 0, 3, 4, 5]
+ #gt_indices = [25 + i for i in gt_indices]
+ openpose_indices = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
+ gt_indices = openpose_indices
+ keypoints_to_render = torch.ones(batch_size, gt_keypoints_3d.shape[1], 1)
+ rotation = torch.eye(3).unsqueeze(0)
+ if camera_translation is None:
+ camera_translation = torch.tensor([0.0, 0.0, 2 * self.cfg.EXTRA.FOCAL_LENGTH / (0.8 * self.cfg.MODEL.IMAGE_SIZE)]).unsqueeze(0).repeat(batch_size, 1)
+ else:
+ camera_translation = camera_translation.cpu()
+
+ if images is None:
+ images = np.zeros((batch_size, self.cfg.MODEL.IMAGE_SIZE, self.cfg.MODEL.IMAGE_SIZE, 3))
+ focal_length = torch.tensor([self.cfg.EXTRA.FOCAL_LENGTH, self.cfg.EXTRA.FOCAL_LENGTH]).reshape(1, 2)
+ camera_center = torch.tensor([self.cfg.MODEL.IMAGE_SIZE, self.cfg.MODEL.IMAGE_SIZE], dtype=torch.float).reshape(1, 2) / 2.
+ gt_keypoints_3d_proj = perspective_projection(gt_keypoints_3d[:, :, :-1], rotation=rotation.repeat(batch_size, 1, 1), translation=camera_translation[:, :], focal_length=focal_length.repeat(batch_size, 1), camera_center=camera_center.repeat(batch_size, 1))
+ pred_keypoints_3d_proj = perspective_projection(pred_keypoints_3d.reshape(batch_size, -1, 3), rotation=rotation.repeat(batch_size, 1, 1), translation=camera_translation.reshape(batch_size, -1), focal_length=focal_length.repeat(batch_size, 1), camera_center=camera_center.repeat(batch_size, 1)).reshape(batch_size, -1, 2)
+ gt_keypoints_3d_proj = torch.cat([gt_keypoints_3d_proj, gt_keypoints_3d[:, :, [-1]]], dim=-1).cpu().numpy()
+ pred_keypoints_3d_proj = torch.cat([pred_keypoints_3d_proj, keypoints_to_render.reshape(batch_size, -1, 1)], dim=-1).cpu().numpy()
+ rows = []
+ # Rotate keypoints to visualize side view
+ R = torch.tensor(trimesh.transformations.rotation_matrix(np.radians(90), [0, 1, 0])[:3, :3]).float()
+ gt_keypoints_3d_side = gt_keypoints_3d.clone()
+ gt_keypoints_3d_side[:, :, :-1] = torch.einsum('bni,ij->bnj', gt_keypoints_3d_side[:, :, :-1], R)
+ pred_keypoints_3d_side = pred_keypoints_3d.clone()
+ pred_keypoints_3d_side = torch.einsum('bni,ij->bnj', pred_keypoints_3d_side, R)
+ gt_keypoints_3d_proj_side = perspective_projection(gt_keypoints_3d_side[:, :, :-1], rotation=rotation.repeat(batch_size, 1, 1), translation=camera_translation[:, :], focal_length=focal_length.repeat(batch_size, 1), camera_center=camera_center.repeat(batch_size, 1))
+ pred_keypoints_3d_proj_side = perspective_projection(pred_keypoints_3d_side.reshape(batch_size, -1, 3), rotation=rotation.repeat(batch_size, 1, 1), translation=camera_translation.reshape(batch_size, -1), focal_length=focal_length.repeat(batch_size, 1), camera_center=camera_center.repeat(batch_size, 1)).reshape(batch_size, -1, 2)
+ gt_keypoints_3d_proj_side = torch.cat([gt_keypoints_3d_proj_side, gt_keypoints_3d_side[:, :, [-1]]], dim=-1).cpu().numpy()
+ pred_keypoints_3d_proj_side = torch.cat([pred_keypoints_3d_proj_side, keypoints_to_render.reshape(batch_size, -1, 1)], dim=-1).cpu().numpy()
+ for i in range(batch_size):
+ img = images[i]
+ side_img = np.zeros((self.cfg.MODEL.IMAGE_SIZE, self.cfg.MODEL.IMAGE_SIZE, 3))
+ # gt 2D keypoints
+ body_keypoints_2d = gt_keypoints_2d[i, :21].copy()
+ for op, gt in zip(openpose_indices, gt_indices):
+ if gt_keypoints_2d[i, gt, -1] > body_keypoints_2d[op, -1]:
+ body_keypoints_2d[op] = gt_keypoints_2d[i, gt]
+ gt_keypoints_img = render_openpose(img, body_keypoints_2d) / 255.
+ # gt 3D keypoints
+ body_keypoints_3d_proj = gt_keypoints_3d_proj[i, :21].copy()
+ for op, gt in zip(openpose_indices, gt_indices):
+ if gt_keypoints_3d_proj[i, gt, -1] > body_keypoints_3d_proj[op, -1]:
+ body_keypoints_3d_proj[op] = gt_keypoints_3d_proj[i, gt]
+ gt_keypoints_3d_proj_img = render_openpose(img, body_keypoints_3d_proj) / 255.
+ # gt 3D keypoints from the side
+ body_keypoints_3d_proj = gt_keypoints_3d_proj_side[i, :21].copy()
+ for op, gt in zip(openpose_indices, gt_indices):
+ if gt_keypoints_3d_proj_side[i, gt, -1] > body_keypoints_3d_proj[op, -1]:
+ body_keypoints_3d_proj[op] = gt_keypoints_3d_proj_side[i, gt]
+ gt_keypoints_3d_proj_img_side = render_openpose(side_img, body_keypoints_3d_proj) / 255.
+ # pred 3D keypoints
+ pred_keypoints_3d_proj_imgs = []
+ body_keypoints_3d_proj = pred_keypoints_3d_proj[i, :21].copy()
+ for op, gt in zip(openpose_indices, gt_indices):
+ if pred_keypoints_3d_proj[i, gt, -1] >= body_keypoints_3d_proj[op, -1]:
+ body_keypoints_3d_proj[op] = pred_keypoints_3d_proj[i, gt]
+ pred_keypoints_3d_proj_imgs.append(render_openpose(img, body_keypoints_3d_proj) / 255.)
+ pred_keypoints_3d_proj_img = np.concatenate(pred_keypoints_3d_proj_imgs, axis=1)
+ # gt 3D keypoints from the side
+ pred_keypoints_3d_proj_imgs_side = []
+ body_keypoints_3d_proj = pred_keypoints_3d_proj_side[i, :21].copy()
+ for op, gt in zip(openpose_indices, gt_indices):
+ if pred_keypoints_3d_proj_side[i, gt, -1] >= body_keypoints_3d_proj[op, -1]:
+ body_keypoints_3d_proj[op] = pred_keypoints_3d_proj_side[i, gt]
+ pred_keypoints_3d_proj_imgs_side.append(render_openpose(side_img, body_keypoints_3d_proj) / 255.)
+ pred_keypoints_3d_proj_img_side = np.concatenate(pred_keypoints_3d_proj_imgs_side, axis=1)
+ rows.append(np.concatenate((gt_keypoints_img, gt_keypoints_3d_proj_img, pred_keypoints_3d_proj_img, gt_keypoints_3d_proj_img_side, pred_keypoints_3d_proj_img_side), axis=1))
+ # Concatenate images
+ img = np.concatenate(rows, axis=0)
+ img[:, ::self.cfg.MODEL.IMAGE_SIZE, :] = 1.0
+ img[::self.cfg.MODEL.IMAGE_SIZE, :, :] = 1.0
+ img[:, (1+1+1)*self.cfg.MODEL.IMAGE_SIZE, :] = 0.5
+ return img