Spaces:

Mounika256
/

Talkshow_Demo

Sleeping

App Files Files Community

Mounika256 commited on Apr 28

Commit

23e13f8

verified ·

1 Parent(s): 1845a9a

Update visualise/rendering.py

Browse files

Files changed (1) hide show

visualise/rendering.py +33 -161

visualise/rendering.py CHANGED Viewed

@@ -1,97 +1,54 @@
 import random
 import cv2
 import os
 import tempfile
 import threading
 from subprocess import call
 import numpy as np
 from scipy.io import wavfile
-import pyrender
-import librosa
 from tqdm import tqdm
-# import open3d as o3d
 from data_utils.utils import load_wav_old
 from voca.rendering import render_mesh_helper
 class Struct(object):
     def __init__(self, **kwargs):
         for key, val in kwargs.items():
             setattr(self, key, val)
 def get_sen(i, num_video, i_frame, pos):
     if num_video == 1:
         sen = 'GT'
     elif num_video == 2:
         if i == 0:
-            if pos == 1:
-                sen = 'A'
-            elif pos == 2:
-                sen = 'B'
-            else:
-                sen = 'GT'
         else:
-            if pos == 1:
-                sen = 'B'
-            elif pos == 2:
-                sen = 'A'
-            else:
-                sen = 'result'
     elif num_video == 3:
-        if i == 0:
-            sen = 'sample1'
-        elif i == 1:
-            sen = 'interpolation'
-        else:
-            sen = 'sample2'
     elif num_video == 9 or num_video == 16:
-        if i == 0:
-            sen = 'frame '+str(i_frame)
-        else:
-            sen = 'sample' + str(i)
     elif num_video == 12:
-        if i == 0:
-            sen = 'sample1'
-        elif i < 11:
-            sen = 'interpolation' + str(i)
-        else:
-            sen = 'sample2'
     return sen
 def add_image_text(img, text, color=(0,0,255), w=800, h=800):
     font = cv2.FONT_HERSHEY_SIMPLEX
-    textsize = cv2.getTextSize(text, font, 8, 2)[0]
-    textX = (img.shape[1] - textsize[0]) // 2
-    textY = textsize[1] + 10
-    # img = img.copy()
-    # a = img * 255
-    # img = a.transpose(1, 2, 0).astype(np.uint8).copy()
-    # cv2.putText(img, '%s' % (text), (textX, textY), font, 1, (0, 0, 0), 2, cv2.LINE_AA)
-    # w = int(text)
-    # img = img.transpose(1, 2, 0)
     img = np.require(img, dtype='f4', requirements=['O', 'W'])
     img.flags.writeable = True
     img1 = img.copy()
     img1 = cv2.putText(img1, '%s' % (text), (100, 100), font, 4, color, 2, 1)
-    img1 = cv2.rectangle(img1, (0, 0), (w, h), color, thickness=3, )
-    # img1 = img1.transpose(2, 0, 1)
     return img1
-class RenderTool():
     def __init__(self, out_path):
         path = os.path.join(os.getcwd(), 'visualise/smplx/SMPLX_NEUTRAL.npz')
         model_data = np.load(path, allow_pickle=True)
@@ -102,21 +59,13 @@ class RenderTool():
             os.makedirs(self.out_path)
     def _render_sequences(self, cur_wav_file, v_list, j=-1, stand=False, face=False, whole_body=False, run_in_parallel=False, transcript=None):
-        # import sys
-        # if sys.platform == 'win32':
         symbol = '/'
-        # else:
-        #     symbol = '\\'
-        print("Render {} {} sequence.".format(cur_wav_file.split(symbol)[-2],cur_wav_file.split(symbol)[-1]))
         if run_in_parallel:
             thread = threading.Thread(target=self._render_helper, args=(cur_wav_file, v_list))
             thread.start()
             thread.join()
         else:
-            # directory = os.path.join(self.out_path, cur_wav_file.split(symbol)[-2])
-            # if not os.path.exists(directory):
-            #     os.makedirs(directory)
-            # video_fname = os.path.join(directory, '%s.mp4' % cur_wav_file.split(symbol)[-1].split('.')[-2])
             directory = os.path.join(self.out_path, cur_wav_file.split(symbol)[2].split(symbol)[0])
             if not os.path.exists(directory):
                 os.makedirs(directory)
@@ -131,119 +80,53 @@ class RenderTool():
     def _render_sequences_helper(self, video_fname, cur_wav_file, v_list, stand, face, whole_body, transcript):
         num_frames = v_list[0].shape[0]
-        # dataset is inverse
         for v in v_list:
             v = v.reshape(v.shape[0], -1, 3)
             v[:, :, 1] = -v[:, :, 1]
             v[:, :, 2] = -v[:, :, 2]
         viewport_height = 800
         z_offset = 1.0
         num_video = len(v_list)
-        assert num_video in [1, 2, 3, 9, 12, 16, 18]
-        if num_video == 1:
-            width, height = 800, 800
-        elif num_video == 2:
-            width, height = 1600, 800
-        elif num_video == 3:
-            width, height = 2400, 800
-        elif num_video == 9:
-            width, height = 2400, 2400
-        elif num_video == 12:
-            width, height = 3200, 2400
-        elif num_video == 16:
-            width, height = 3200, 3200
-        elif num_video == 18:
-            width, height = 4800, 2400
         if whole_body:
             width, height = 800, 1440
             viewport_height = 1440
             z_offset = 1.8
-        sr = 22000
         audio, sr = librosa.load(cur_wav_file, sr=16000)
         tmp_audio_file = tempfile.NamedTemporaryFile('w', suffix='.wav', dir=os.path.dirname(video_fname))
         tmp_audio_file.close()
         wavfile.write(tmp_audio_file.name, sr, audio)
         tmp_video_file = tempfile.NamedTemporaryFile('w', suffix='.mp4', dir=os.path.dirname(video_fname))
         tmp_video_file.close()
-        if int(cv2.__version__[0]) < 3:
-            print('cv2 < 3')
-            writer = cv2.VideoWriter(tmp_video_file.name, cv2.cv.CV_FOURCC(*'mp4v'), 30, (width, height), True)
-        else:
-            print('cv2 >= 3')
-            writer = cv2.VideoWriter(tmp_video_file.name, cv2.VideoWriter_fourcc(*'mp4v'), 30, (width, height), True)
-        center = np.mean(v_list[0][0], axis=0)
-        r = pyrender.OffscreenRenderer(viewport_width=800, viewport_height=viewport_height)
-        # random exchange the position of our method and SG3D
-        # pos = random.randint(1, 2)
-        # video_fname = list(video_fname)
-        # video_fname.insert(-4, str(pos))
-        # video_fname = ''.join(video_fname)
-        pos = 1
         for i_frame in tqdm(range(num_frames)):
-            # pyrender.Viewer(scene)
             cur_img = []
             for i in range(len(v_list)):
-                if face:
-                    img = render_mesh_helper((v_list[i][i_frame], self.f), center,
-                                             r=r, xmag=0.15, y=1, z=1.0, camera='o')
-                else:
-                    img = render_mesh_helper((v_list[i][i_frame], self.f), center, camera='o', r=r, y=0.7, z_offset=z_offset)
-                # sen = get_sen(i, num_video, i_frame, pos)
-                # if transcript is not None:
-                #     sen = str(int(transcript[i_frame].item()))
-                # else:
-                #     sen = ' '
-                # img = add_image_text(img, sen)
                 cur_img.append(img)
-            if num_video == 1:
-                final_img = cur_img[0].astype(np.uint8)
-            elif num_video == 2:
-                final_img = np.hstack((cur_img[0], cur_img[1])).astype(np.uint8)
-            elif num_video == 3:
-                final_img = np.hstack((cur_img[0], cur_img[1], cur_img[2])).astype(np.uint8)
-            elif num_video == 9:
-                img_vert_0 = np.hstack((cur_img[0], cur_img[1], cur_img[2])).astype(np.uint8)
-                img_vert_1 = np.hstack((cur_img[3], cur_img[4], cur_img[5])).astype(np.uint8)
-                img_vert_2 = np.hstack((cur_img[6], cur_img[7], cur_img[8])).astype(np.uint8)
-                final_img = np.vstack((img_vert_0, img_vert_1, img_vert_2)).astype(np.uint8)
-            elif num_video == 12:
-                img_vert_0 = np.hstack((cur_img[0], cur_img[1], cur_img[2], cur_img[3])).astype(np.uint8)
-                img_vert_1 = np.hstack((cur_img[4], cur_img[5], cur_img[6], cur_img[7])).astype(np.uint8)
-                img_vert_2 = np.hstack((cur_img[8], cur_img[9], cur_img[10], cur_img[11])).astype(np.uint8)
-                final_img = np.vstack((img_vert_0, img_vert_1, img_vert_2)).astype(np.uint8)
-            elif num_video == 16:
-                img_vert_0 = np.hstack((cur_img[0], cur_img[1], cur_img[2], cur_img[3])).astype(np.uint8)
-                img_vert_1 = np.hstack((cur_img[4], cur_img[5], cur_img[6], cur_img[7])).astype(np.uint8)
-                img_vert_2 = np.hstack((cur_img[8], cur_img[9], cur_img[10], cur_img[11])).astype(np.uint8)
-                img_vert_3 = np.hstack((cur_img[12], cur_img[13], cur_img[14], cur_img[15])).astype(np.uint8)
-                final_img = np.vstack((img_vert_0, img_vert_1, img_vert_2, img_vert_3)).astype(np.uint8)
-            elif num_video == 18:
-                img_vert_0 = np.hstack((cur_img[0], cur_img[1], cur_img[2], cur_img[3], cur_img[4], cur_img[5])).astype(np.uint8)
-                img_vert_1 = np.hstack((cur_img[6], cur_img[7], cur_img[8], cur_img[9], cur_img[10], cur_img[11])).astype(np.uint8)
-                img_vert_2 = np.hstack((cur_img[12], cur_img[13], cur_img[14], cur_img[15], cur_img[16], cur_img[17])).astype(
-                    np.uint8)
-                final_img = np.vstack((img_vert_0, img_vert_1, img_vert_2)).astype(np.uint8)
-            # final_img = add_image_text(final_img, 'frame'+str(i_frame), w=width, h=height)
             writer.write(final_img)
         writer.release()
-        cmd = ('ffmpeg' + ' -i {0} -i {1} -vcodec h264 -ac 2 -channel_layout stereo -pix_fmt yuv420p {2}'.format(
             tmp_audio_file.name, tmp_video_file.name, video_fname)).split()
-        # cmd = ('ffmpeg' + '-i {0} -vcodec h264 -ac 2 -channel_layout stereo -pix_fmt yuv420p {1}'.format(
-        #     tmp_video_file.name, video_fname)).split()
         call(cmd)
         os.remove(tmp_audio_file.name)
         os.remove(tmp_video_file.name)
     def _render_continuity(self, cur_wav_file, pred, frame, run_in_parallel=False):
-        print("Render {} {} sequence.".format(cur_wav_file.split(symbol)[-2],cur_wav_file.split(symbol)[-1]))
         if run_in_parallel:
             thread = threading.Thread(target=self._render_helper, args=(cur_wav_file, pred))
             thread.start()
@@ -252,53 +135,42 @@ class RenderTool():
             self._render_helper2(cur_wav_file, pred, frame)
     def _render_helper2(self, cur_wav_file, pred, frame):
-        directory = os.path.join(self.out_path, cur_wav_file.split('/')[2].split(symbol)[0])
         if not os.path.exists(directory):
             os.makedirs(directory)
-        video_fname = os.path.join(directory, '%s.mp4' % cur_wav_file.split(symbol)[-1].split('.')[-2].split('/')[-1])
         self._render_sequences_helper2(video_fname, cur_wav_file, pred, frame)
     def _render_sequences_helper2(self, video_fname, cur_wav_file, pred, frame):
         num_frames = pred.shape[0]
         pred = pred.reshape(pred.shape[0], -1, 3)
         pred[:, :, 1] = -pred[:, :, 1]
         pred[:, :, 2] = -pred[:, :, 2]
-        sr = 22000
-        audio, sr = load_wav_old(cur_wav_file, sr=sr)
         tmp_audio_file = tempfile.NamedTemporaryFile('w', suffix='.wav', dir=os.path.dirname(video_fname))
         tmp_audio_file.close()
         wavfile.write(tmp_audio_file.name, sr, audio)
         tmp_video_file = tempfile.NamedTemporaryFile('w', suffix='.mp4', dir=os.path.dirname(video_fname))
         tmp_video_file.close()
-        if int(cv2.__version__[0]) < 3:
-            print('cv2 < 3')
-            writer = cv2.VideoWriter(tmp_video_file.name, cv2.cv.CV_FOURCC(*'mp4v'), 15, (190, 800), True)
-        else:
-            print('cv2 >= 3')
-            writer = cv2.VideoWriter(tmp_video_file.name, cv2.VideoWriter_fourcc(*'mp4v'), 30, (800, 800), True)
         center = np.mean(pred[0], axis=0)
-        r = pyrender.OffscreenRenderer(viewport_width=800, viewport_height=800)
         for i_frame in tqdm(range(num_frames)):
-            if i_frame < frame:
-                sen = 'sequence 1'
-                color = (0,255,0)
-            else:
-                sen = 'sequence 2'
-                color = (0, 0, 255)
-            pred_img = render_mesh_helper(Mesh(pred[i_frame], self.template_mesh.f), center, camera='o',r=r, y=0.7)
-            pred_img = add_image_text(pred_img, sen, color)
             pred_img = pred_img.astype(np.uint8)
             writer.write(pred_img)
         writer.release()
-        cmd = ('ffmpeg' + ' -i {0} -i {1} -vcodec h264 -ac 2 -channel_layout stereo -pix_fmt yuv420p {2}'.format(
             tmp_audio_file.name, tmp_video_file.name, video_fname)).split()
         call(cmd)
         os.remove(tmp_audio_file.name)
-        os.remove(tmp_video_file.name)

 import random
 import cv2
 import os
 import tempfile
 import threading
 from subprocess import call
 import numpy as np
 from scipy.io import wavfile
+from pyrender import OffscreenRenderer, DirectionalLight, Scene, Mesh
+import trimesh
+from PIL import Image
+import librosa
 from tqdm import tqdm
 from data_utils.utils import load_wav_old
 from voca.rendering import render_mesh_helper
 class Struct(object):
     def __init__(self, **kwargs):
         for key, val in kwargs.items():
             setattr(self, key, val)
 def get_sen(i, num_video, i_frame, pos):
     if num_video == 1:
         sen = 'GT'
     elif num_video == 2:
         if i == 0:
+            sen = 'A' if pos == 1 else 'B'
         else:
+            sen = 'B' if pos == 1 else 'A'
     elif num_video == 3:
+        sen = ['sample1', 'interpolation', 'sample2'][i]
     elif num_video == 9 or num_video == 16:
+        sen = 'frame ' + str(i_frame) if i == 0 else 'sample' + str(i)
     elif num_video == 12:
+        sen = 'sample1' if i == 0 else 'sample2' if i == 11 else 'interpolation' + str(i)
     return sen
 def add_image_text(img, text, color=(0,0,255), w=800, h=800):
     font = cv2.FONT_HERSHEY_SIMPLEX
     img = np.require(img, dtype='f4', requirements=['O', 'W'])
     img.flags.writeable = True
     img1 = img.copy()
     img1 = cv2.putText(img1, '%s' % (text), (100, 100), font, 4, color, 2, 1)
+    img1 = cv2.rectangle(img1, (0, 0), (w, h), color, thickness=3)
     return img1
+class RenderTool:
     def __init__(self, out_path):
         path = os.path.join(os.getcwd(), 'visualise/smplx/SMPLX_NEUTRAL.npz')
         model_data = np.load(path, allow_pickle=True)
             os.makedirs(self.out_path)
     def _render_sequences(self, cur_wav_file, v_list, j=-1, stand=False, face=False, whole_body=False, run_in_parallel=False, transcript=None):
         symbol = '/'
+        print("Render {} {} sequence.".format(cur_wav_file.split(symbol)[-2], cur_wav_file.split(symbol)[-1]))
         if run_in_parallel:
             thread = threading.Thread(target=self._render_helper, args=(cur_wav_file, v_list))
             thread.start()
             thread.join()
         else:
             directory = os.path.join(self.out_path, cur_wav_file.split(symbol)[2].split(symbol)[0])
             if not os.path.exists(directory):
                 os.makedirs(directory)
     def _render_sequences_helper(self, video_fname, cur_wav_file, v_list, stand, face, whole_body, transcript):
         num_frames = v_list[0].shape[0]
         for v in v_list:
             v = v.reshape(v.shape[0], -1, 3)
             v[:, :, 1] = -v[:, :, 1]
             v[:, :, 2] = -v[:, :, 2]
         viewport_height = 800
         z_offset = 1.0
         num_video = len(v_list)
+        width, height = (800, 800) if num_video == 1 else (1600, 800) if num_video == 2 else (2400, 800)
         if whole_body:
             width, height = 800, 1440
             viewport_height = 1440
             z_offset = 1.8
         audio, sr = librosa.load(cur_wav_file, sr=16000)
         tmp_audio_file = tempfile.NamedTemporaryFile('w', suffix='.wav', dir=os.path.dirname(video_fname))
         tmp_audio_file.close()
         wavfile.write(tmp_audio_file.name, sr, audio)
         tmp_video_file = tempfile.NamedTemporaryFile('w', suffix='.mp4', dir=os.path.dirname(video_fname))
         tmp_video_file.close()
+        writer = cv2.VideoWriter(tmp_video_file.name, cv2.VideoWriter_fourcc(*'mp4v'), 30, (width, height), True)
+        center = np.mean(v_list[0][0], axis=0)
+        r = OffscreenRenderer(viewport_width=800, viewport_height=viewport_height)
         for i_frame in tqdm(range(num_frames)):
             cur_img = []
             for i in range(len(v_list)):
+                img = render_mesh_helper((v_list[i][i_frame], self.f), center, r=r, camera='o', y=0.7, z_offset=z_offset)
                 cur_img.append(img)
+            final_img = np.hstack(cur_img).astype(np.uint8) if num_video in [2, 3] else cur_img[0].astype(np.uint8)
             writer.write(final_img)
         writer.release()
+        cmd = ('ffmpeg -i {0} -i {1} -vcodec h264 -ac 2 -channel_layout stereo -pix_fmt yuv420p {2}'.format(
             tmp_audio_file.name, tmp_video_file.name, video_fname)).split()
         call(cmd)
         os.remove(tmp_audio_file.name)
         os.remove(tmp_video_file.name)
     def _render_continuity(self, cur_wav_file, pred, frame, run_in_parallel=False):
+        symbol = '/'
+        print("Render {} {} sequence.".format(cur_wav_file.split(symbol)[-2], cur_wav_file.split(symbol)[-1]))
         if run_in_parallel:
             thread = threading.Thread(target=self._render_helper, args=(cur_wav_file, pred))
             thread.start()
             self._render_helper2(cur_wav_file, pred, frame)
     def _render_helper2(self, cur_wav_file, pred, frame):
+        symbol = '/'
+        directory = os.path.join(self.out_path, cur_wav_file.split(symbol)[2].split(symbol)[0])
         if not os.path.exists(directory):
             os.makedirs(directory)
+        video_fname = os.path.join(directory, '%s.mp4' % cur_wav_file.split(symbol)[-1].split('.')[-2].split(symbol)[-1])
         self._render_sequences_helper2(video_fname, cur_wav_file, pred, frame)
     def _render_sequences_helper2(self, video_fname, cur_wav_file, pred, frame):
         num_frames = pred.shape[0]
         pred = pred.reshape(pred.shape[0], -1, 3)
         pred[:, :, 1] = -pred[:, :, 1]
         pred[:, :, 2] = -pred[:, :, 2]
+        audio, sr = load_wav_old(cur_wav_file, sr=22000)
         tmp_audio_file = tempfile.NamedTemporaryFile('w', suffix='.wav', dir=os.path.dirname(video_fname))
         tmp_audio_file.close()
         wavfile.write(tmp_audio_file.name, sr, audio)
         tmp_video_file = tempfile.NamedTemporaryFile('w', suffix='.mp4', dir=os.path.dirname(video_fname))
         tmp_video_file.close()
+        writer = cv2.VideoWriter(tmp_video_file.name, cv2.VideoWriter_fourcc(*'mp4v'), 30, (800, 800), True)
         center = np.mean(pred[0], axis=0)
+        r = OffscreenRenderer(viewport_width=800, viewport_height=800)
         for i_frame in tqdm(range(num_frames)):
+            pred_img = render_mesh_helper((pred[i_frame], self.f), center, camera='o', r=r, y=0.7)
+            pred_img = add_image_text(pred_img, 'frame{}'.format(i_frame))
             pred_img = pred_img.astype(np.uint8)
             writer.write(pred_img)
         writer.release()
+        cmd = ('ffmpeg -i {0} -i {1} -vcodec h264 -ac 2 -channel_layout stereo -pix_fmt yuv420p {2}'.format(
             tmp_audio_file.name, tmp_video_file.name, video_fname)).split()
         call(cmd)
         os.remove(tmp_audio_file.name)
+        os.remove(tmp_video_file.name)