| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import os |
| | import cv2 |
| | import time |
| | import json |
| | import torch |
| | import subprocess |
| | import numpy as np |
| | import os.path as osp |
| | |
| | from collections import OrderedDict |
| |
|
| | from utils.smooth_bbox import get_smooth_bbox_params, get_all_bbox_params |
| | from datasets.data_utils.img_utils import get_single_image_crop_demo |
| | from utils.geometry import rotation_matrix_to_angle_axis |
| |
|
| |
|
| | def preprocess_video(video, joints2d, bboxes, frames, scale=1.0, crop_size=224): |
| | """ |
| | Read video, do normalize and crop it according to the bounding box. |
| | If there are bounding box annotations, use them to crop the image. |
| | If no bounding box is specified but openpose detections are available, use them to get the bounding box. |
| | |
| | :param video (ndarray): input video |
| | :param joints2d (ndarray, NxJx3): openpose detections |
| | :param bboxes (ndarray, Nx5): bbox detections |
| | :param scale (float): bbox crop scaling factor |
| | :param crop_size (int): crop width and height |
| | :return: cropped video, cropped and normalized video, modified bboxes, modified joints2d |
| | """ |
| |
|
| | if joints2d is not None: |
| | bboxes, time_pt1, time_pt2 = get_all_bbox_params(joints2d, vis_thresh=0.3) |
| | bboxes[:, 2:] = 150. / bboxes[:, 2:] |
| | bboxes = np.stack([bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 2]]).T |
| |
|
| | video = video[time_pt1:time_pt2] |
| | joints2d = joints2d[time_pt1:time_pt2] |
| | frames = frames[time_pt1:time_pt2] |
| |
|
| | shape = video.shape |
| |
|
| | temp_video = np.zeros((shape[0], crop_size, crop_size, shape[-1])) |
| | norm_video = torch.zeros(shape[0], shape[-1], crop_size, crop_size) |
| |
|
| | for idx in range(video.shape[0]): |
| |
|
| | img = video[idx] |
| | bbox = bboxes[idx] |
| |
|
| | j2d = joints2d[idx] if joints2d is not None else None |
| |
|
| | norm_img, raw_img, kp_2d = get_single_image_crop_demo( |
| | img, bbox, kp_2d=j2d, scale=scale, crop_size=crop_size |
| | ) |
| |
|
| | if joints2d is not None: |
| | joints2d[idx] = kp_2d |
| |
|
| | temp_video[idx] = raw_img |
| | norm_video[idx] = norm_img |
| |
|
| | temp_video = temp_video.astype(np.uint8) |
| |
|
| | return temp_video, norm_video, bboxes, joints2d, frames |
| |
|
| |
|
| | def download_youtube_clip(url, download_folder): |
| | return YouTube(url).streams.first().download(output_path=download_folder) |
| |
|
| |
|
| | def smplify_runner( |
| | pred_rotmat, |
| | pred_betas, |
| | pred_cam, |
| | j2d, |
| | device, |
| | batch_size, |
| | lr=1.0, |
| | opt_steps=1, |
| | use_lbfgs=True, |
| | pose2aa=True |
| | ): |
| | smplify = TemporalSMPLify( |
| | step_size=lr, |
| | batch_size=batch_size, |
| | num_iters=opt_steps, |
| | focal_length=5000., |
| | use_lbfgs=use_lbfgs, |
| | device=device, |
| | |
| | ) |
| | |
| | if pose2aa: |
| | pred_pose = rotation_matrix_to_angle_axis(pred_rotmat.detach()).reshape(batch_size, -1) |
| | else: |
| | pred_pose = pred_rotmat |
| |
|
| | |
| | pred_cam_t = torch.stack( |
| | [pred_cam[:, 1], pred_cam[:, 2], 2 * 5000 / (224 * pred_cam[:, 0] + 1e-9)], dim=-1 |
| | ) |
| |
|
| | gt_keypoints_2d_orig = j2d |
| | |
| | opt_joint_loss = smplify.get_fitting_loss( |
| | pred_pose.detach(), pred_betas.detach(), pred_cam_t.detach(), |
| | 0.5 * 224 * torch.ones(batch_size, 2, device=device), gt_keypoints_2d_orig |
| | ).mean(dim=-1) |
| |
|
| | best_prediction_id = torch.argmin(opt_joint_loss).item() |
| | pred_betas = pred_betas[best_prediction_id].unsqueeze(0) |
| | |
| | |
| | |
| |
|
| | start = time.time() |
| | |
| | |
| | |
| | |
| | output, new_opt_joint_loss = smplify( |
| | pred_pose.detach(), |
| | pred_betas.detach(), |
| | pred_cam_t.detach(), |
| | 0.5 * 224 * torch.ones(batch_size, 2, device=device), |
| | gt_keypoints_2d_orig, |
| | ) |
| | new_opt_joint_loss = new_opt_joint_loss.mean(dim=-1) |
| | |
| | |
| | |
| | update = (new_opt_joint_loss < opt_joint_loss) |
| |
|
| | new_opt_vertices = output['verts'] |
| | new_opt_cam_t = output['theta'][:, :3] |
| | new_opt_pose = output['theta'][:, 3:75] |
| | new_opt_betas = output['theta'][:, 75:] |
| | new_opt_joints3d = output['kp_3d'] |
| |
|
| | return_val = [ |
| | update, |
| | new_opt_vertices.cpu(), |
| | new_opt_cam_t.cpu(), |
| | new_opt_pose.cpu(), |
| | new_opt_betas.cpu(), |
| | new_opt_joints3d.cpu(), |
| | new_opt_joint_loss, |
| | opt_joint_loss, |
| | ] |
| |
|
| | return return_val |
| |
|
| |
|
| | def trim_videos(filename, start_time, end_time, output_filename): |
| | command = [ |
| | 'ffmpeg', '-i', |
| | '"%s"' % filename, '-ss', |
| | str(start_time), '-t', |
| | str(end_time - start_time), '-c:v', 'libx264', '-c:a', 'copy', '-threads', '1', '-loglevel', |
| | 'panic', |
| | '"%s"' % output_filename |
| | ] |
| | |
| | subprocess.call(command) |
| |
|
| |
|
| | def video_to_images(vid_file, img_folder=None, return_info=False): |
| | if img_folder is None: |
| | img_folder = osp.join(osp.expanduser('~'), 'tmp', osp.basename(vid_file).replace('.', '_')) |
| | |
| |
|
| | print(img_folder) |
| | os.makedirs(img_folder, exist_ok=True) |
| |
|
| | command = ['ffmpeg', '-i', vid_file, '-f', 'image2', '-v', 'error', f'{img_folder}/%06d.png'] |
| | print(f'Running \"{" ".join(command)}\"') |
| |
|
| | try: |
| | subprocess.call(command) |
| | except: |
| | subprocess.call(f'{" ".join(command)}', shell=True) |
| |
|
| | print(f'Images saved to \"{img_folder}\"') |
| |
|
| | img_shape = cv2.imread(osp.join(img_folder, '000001.png')).shape |
| |
|
| | if return_info: |
| | return img_folder, len(os.listdir(img_folder)), img_shape |
| | else: |
| | return img_folder |
| |
|
| |
|
| | def download_url(url, outdir): |
| | print(f'Downloading files from {url}') |
| | cmd = ['wget', '-c', url, '-P', outdir] |
| | subprocess.call(cmd) |
| |
|
| |
|
| | def download_ckpt(outdir='data/vibe_data', use_3dpw=False): |
| | os.makedirs(outdir, exist_ok=True) |
| |
|
| | if use_3dpw: |
| | ckpt_file = 'data/vibe_data/vibe_model_w_3dpw.pth.tar' |
| | url = 'https://www.dropbox.com/s/41ozgqorcp095ja/vibe_model_w_3dpw.pth.tar' |
| | if not os.path.isfile(ckpt_file): |
| | download_url(url=url, outdir=outdir) |
| | else: |
| | ckpt_file = 'data/vibe_data/vibe_model_wo_3dpw.pth.tar' |
| | url = 'https://www.dropbox.com/s/amj2p8bmf6g56k6/vibe_model_wo_3dpw.pth.tar' |
| | if not os.path.isfile(ckpt_file): |
| | download_url(url=url, outdir=outdir) |
| |
|
| | return ckpt_file |
| |
|
| |
|
| | def images_to_video(img_folder, output_vid_file): |
| | os.makedirs(img_folder, exist_ok=True) |
| |
|
| | command = [ |
| | 'ffmpeg', |
| | '-y', |
| | '-threads', |
| | '16', |
| | '-i', |
| | f'{img_folder}/%06d.png', |
| | '-profile:v', |
| | 'baseline', |
| | '-level', |
| | '3.0', |
| | '-c:v', |
| | 'libx264', |
| | '-pix_fmt', |
| | 'yuv420p', |
| | '-an', |
| | '-v', |
| | 'error', |
| | output_vid_file, |
| | ] |
| |
|
| | print(f'Running \"{" ".join(command)}\"') |
| | try: |
| | subprocess.call(command) |
| | except: |
| | subprocess.call(f'{" ".join(command)}', shell=True) |
| |
|
| |
|
| | def convert_crop_cam_to_orig_img(cam, bbox, img_width, img_height): |
| | ''' |
| | Convert predicted camera from cropped image coordinates |
| | to original image coordinates |
| | :param cam (ndarray, shape=(3,)): weak perspective camera in cropped img coordinates |
| | :param bbox (ndarray, shape=(4,)): bbox coordinates (c_x, c_y, h) |
| | :param img_width (int): original image width |
| | :param img_height (int): original image height |
| | :return: |
| | ''' |
| | cx, cy, h = bbox[:, 0], bbox[:, 1], bbox[:, 2] |
| | hw, hh = img_width / 2., img_height / 2. |
| | sx = cam[:, 0] * (1. / (img_width / h)) |
| | sy = cam[:, 0] * (1. / (img_height / h)) |
| | tx = ((cx - hw) / hw / sx) + cam[:, 1] |
| | ty = ((cy - hh) / hh / sy) + cam[:, 2] |
| | orig_cam = np.stack([sx, sy, tx, ty]).T |
| | return orig_cam |
| |
|
| |
|
| | def prepare_rendering_results(results_dict, nframes): |
| | frame_results = [{} for _ in range(nframes)] |
| | for person_id, person_data in results_dict.items(): |
| | for idx, frame_id in enumerate(person_data['frame_ids']): |
| | frame_results[frame_id][person_id] = { |
| | 'verts': |
| | person_data['verts'][idx], |
| | 'smplx_verts': |
| | person_data['smplx_verts'][idx] if 'smplx_verts' in person_data else None, |
| | 'cam': |
| | person_data['orig_cam'][idx], |
| | 'cam_t': |
| | person_data['orig_cam_t'][idx] if 'orig_cam_t' in person_data else None, |
| | |
| | } |
| |
|
| | |
| | for frame_id, frame_data in enumerate(frame_results): |
| | |
| | sort_idx = np.argsort([v['cam'][1] for k, v in frame_data.items()]) |
| | frame_results[frame_id] = OrderedDict( |
| | {list(frame_data.keys())[i]: frame_data[list(frame_data.keys())[i]] |
| | for i in sort_idx} |
| | ) |
| |
|
| | return frame_results |
| |
|