Spaces:
Sleeping
Sleeping
| import os | |
| import os.path as osp | |
| import cv2 | |
| import torch | |
| import imageio | |
| import numpy as np | |
| from progress.bar import Bar | |
| from lib.vis.renderer import Renderer, get_global_cameras | |
| def run_vis_on_demo(cfg, video, results, output_pth, smpl, vis_global=True): | |
| # to torch tensor | |
| tt = lambda x: torch.from_numpy(x).float().to(cfg.DEVICE) | |
| cap = cv2.VideoCapture(video) | |
| fps = cap.get(cv2.CAP_PROP_FPS) | |
| length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) | |
| width, height = cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT) | |
| # create renderer with cliff focal length estimation | |
| focal_length = (width ** 2 + height ** 2) ** 0.5 | |
| renderer = Renderer(width, height, focal_length, cfg.DEVICE, smpl.faces) | |
| if vis_global: | |
| # setup global coordinate subject | |
| # current implementation only visualize the subject appeared longest | |
| n_frames = {k: len(results[k]['frame_ids']) for k in results.keys()} | |
| sid = max(n_frames, key=n_frames.get) | |
| global_output = smpl.get_output( | |
| body_pose=tt(results[sid]['pose_world'][:, 3:]), | |
| global_orient=tt(results[sid]['pose_world'][:, :3]), | |
| betas=tt(results[sid]['betas']), | |
| transl=tt(results[sid]['trans_world'])) | |
| verts_glob = global_output.vertices.cpu() | |
| verts_glob[..., 1] = verts_glob[..., 1] - verts_glob[..., 1].min() | |
| cx, cz = (verts_glob.mean(1).max(0)[0] + verts_glob.mean(1).min(0)[0])[[0, 2]] / 2.0 | |
| sx, sz = (verts_glob.mean(1).max(0)[0] - verts_glob.mean(1).min(0)[0])[[0, 2]] | |
| scale = max(sx.item(), sz.item()) * 1.5 | |
| # set default ground | |
| renderer.set_ground(scale, cx.item(), cz.item()) | |
| # build global camera | |
| global_R, global_T, global_lights = get_global_cameras(verts_glob, cfg.DEVICE) | |
| # build default camera | |
| default_R, default_T = torch.eye(3), torch.zeros(3) | |
| writer = imageio.get_writer( | |
| osp.join(output_pth, 'output.mp4'), | |
| fps=fps, mode='I', format='FFMPEG', macro_block_size=1 | |
| ) | |
| bar = Bar('Rendering results ...', fill='#', max=length) | |
| frame_i = 0 | |
| _global_R, _global_T = None, None | |
| # run rendering | |
| while (cap.isOpened()): | |
| flag, org_img = cap.read() | |
| if not flag: break | |
| img = org_img[..., ::-1].copy() | |
| # render onto the input video | |
| renderer.create_camera(default_R, default_T) | |
| for _id, val in results.items(): | |
| # render onto the image | |
| frame_i2 = np.where(val['frame_ids'] == frame_i)[0] | |
| if len(frame_i2) == 0: continue | |
| frame_i2 = frame_i2[0] | |
| img = renderer.render_mesh(torch.from_numpy(val['verts'][frame_i2]).to(cfg.DEVICE), img) | |
| if vis_global: | |
| # render the global coordinate | |
| if frame_i in results[sid]['frame_ids']: | |
| frame_i3 = np.where(results[sid]['frame_ids'] == frame_i)[0] | |
| verts = verts_glob[[frame_i3]].to(cfg.DEVICE) | |
| faces = renderer.faces.clone().squeeze(0) | |
| colors = torch.ones((1, 4)).float().to(cfg.DEVICE); colors[..., :3] *= 0.9 | |
| if _global_R is None: | |
| _global_R = global_R[frame_i3].clone(); _global_T = global_T[frame_i3].clone() | |
| cameras = renderer.create_camera(global_R[frame_i3], global_T[frame_i3]) | |
| img_glob = renderer.render_with_ground(verts, faces, colors, cameras, global_lights) | |
| try: img = np.concatenate((img, img_glob), axis=1) | |
| except: img = np.concatenate((img, np.ones_like(img) * 255), axis=1) | |
| writer.append_data(img) | |
| bar.next() | |
| frame_i += 1 | |
| writer.close() |