import os import os.path as osp import platform import random import subprocess import cv2 as cv import numpy as np import seaborn as sns from PIL import ImageColor FFMPEG_PATH = "/usr/bin/ffmpeg" if osp.exists("/usr/bin/ffmpeg") else "ffmpeg" font_files = { "Windows": "C:/Windows/Fonts/arial.ttf", "Linux": "/usr/share/fonts/truetype/lato/Lato-Regular.ttf", "Darwin": "/System/Library/Fonts/Supplemental/Arial.ttf", } def get_video_width_height(video_file): vcap = cv.VideoCapture(video_file) img_w = int(vcap.get(3)) img_h = int(vcap.get(4)) return img_w, img_h def get_video_num_fr(video_file): vcap = cv.VideoCapture(video_file) num_fr = int(vcap.get(cv.CAP_PROP_FRAME_COUNT)) return num_fr def get_video_fps(video_file): vcap = cv.VideoCapture(video_file) fps = vcap.get(cv.CAP_PROP_FPS) return fps def rescale_video(video_path, out_path, width=-1, height=-1, verbose=True): os.makedirs(osp.dirname(out_path), exist_ok=True) cmd = [ FFMPEG_PATH, "-y", "-i", video_path, "-vf", f"scale={width}:{height}", out_path, ] if not verbose: cmd += ["-hide_banner", "-loglevel", "error"] subprocess.run(cmd) def crop_video( video_path, out_path, width="iw", height="ih", x=0, y=0, crop_str=None, verbose=True ): os.makedirs(osp.dirname(out_path), exist_ok=True) if crop_str is None: crop_str = f"{width}:{height}:{x}:{y}" cmd = [FFMPEG_PATH, "-y", "-i", video_path, "-vf", f"crop={crop_str}", out_path] if not verbose: cmd += ["-hide_banner", "-loglevel", "error"] subprocess.run(cmd) def clip_video(video_path, out_path, start, end, verbose=True): os.makedirs(osp.dirname(out_path), exist_ok=True) cmd = [ FFMPEG_PATH, "-y", "-ss", f"{start}", "-i", video_path, "-t", f"{end - start}", out_path, ] if not verbose: cmd += ["-hide_banner", "-loglevel", "error"] subprocess.run(cmd) def text_video( video_path, out_path, text, x=10, y=30, verbose=True, text_color="white", text_size=60, ): os.makedirs(osp.dirname(out_path), exist_ok=True) font_file = font_files[platform.system()] draw_str = f"drawtext=fontsize={text_size}:fontfile={font_file}:fontcolor={text_color}:text='{text}':x={x}:y={y}" cmd = [FFMPEG_PATH, "-y", "-i", video_path, "-vf", draw_str, out_path] if not verbose: cmd += ["-hide_banner", "-loglevel", "error"] subprocess.run(cmd) def make_gif( video_path, out_path, t_start="00:00:00", t_end=None, width=600, height=-1, verbose=True, ): os.makedirs(osp.dirname(out_path), exist_ok=True) cmd = ( [FFMPEG_PATH, "-y", "-i", video_path, "-ss", t_start] + (["-t", t_end] if t_end is not None else []) + [ "-vf", f"fps=30,scale={width}:{height}:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse", "-loop", "0", out_path, ] ) if not verbose: cmd += ["-hide_banner", "-loglevel", "error"] subprocess.run(cmd) def images_to_video( img_dir, out_path, img_fmt="%06d.jpg", fps=30, crf=25, verbose=True ): os.makedirs(osp.dirname(out_path), exist_ok=True) cmd = [ FFMPEG_PATH, "-y", "-r", f"{fps}", "-f", "image2", "-start_number", "0", "-i", f"{img_dir}/{img_fmt}", "-vcodec", "libx264", "-vf", "pad=ceil(iw/2)*2:ceil(ih/2)*2", "-crf", f"{crf}", "-pix_fmt", "yuv420p", out_path, ] if not verbose: cmd += ["-hide_banner", "-loglevel", "error"] p = subprocess.run(cmd) if p.returncode != 0: raise Exception("Something went wrong during images_to_video!") def video_to_images(video_path, out_path, img_fmt="%06d.jpg", fps=30, verbose=True): os.makedirs(out_path, exist_ok=True) cmd = [FFMPEG_PATH, "-i", video_path, "-r", f"{fps}", f"{out_path}/{img_fmt}"] if not verbose: cmd += ["-hide_banner", "-loglevel", "error"] p = subprocess.run(cmd) if p.returncode != 0: raise Exception("Something went wrong during video_to_images!") def hstack_videos( video1_path, video2_path, out_path, crf=25, verbose=True, text1=None, text2=None, text_color="white", text_size=60, ): if not (text1 is None or text2 is None): write_text = True tmp_file = f"{osp.splitext(out_path)[0]}_tmp.mp4" else: write_text = False os.makedirs(osp.dirname(out_path), exist_ok=True) cmd = [ FFMPEG_PATH, "-y", "-i", video1_path, "-i", video2_path, "-filter_complex", "hstack,format=yuv420p", "-vcodec", "libx264", "-crf", f"{crf}", tmp_file if write_text else out_path, ] if not verbose: cmd += ["-hide_banner", "-loglevel", "error"] subprocess.run(cmd) if write_text: font_file = font_files[platform.system()] draw_str = ( f"drawtext=fontsize={text_size}:fontfile={font_file}:fontcolor={text_color}:text='{text1}':x=(w-text_w)/4:y=20," f"drawtext=fontsize={text_size}:fontfile={font_file}:fontcolor={text_color}:text='{text2}':x=3*(w-text_w)/4:y=20" ) cmd = [ FFMPEG_PATH, "-i", tmp_file, "-y", "-vf", draw_str, "-c:a", "copy", out_path, ] if not verbose: cmd += ["-hide_banner", "-loglevel", "error"] subprocess.run(cmd) os.remove(tmp_file) def vstack_videos( video1_path, video2_path, out_path, crf=25, verbose=True, text1=None, text2=None, text_color="white", text_size=60, ): if not (text1 is None or text2 is None): write_text = True tmp_file = f"{osp.splitext(out_path)[0]}_tmp.mp4" else: write_text = False os.makedirs(osp.dirname(out_path), exist_ok=True) cmd = [ FFMPEG_PATH, "-y", "-i", video1_path, "-i", video2_path, "-filter_complex", "vstack,format=yuv420p", "-vcodec", "libx264", "-crf", f"{crf}", tmp_file if write_text else out_path, ] if not verbose: cmd += ["-hide_banner", "-loglevel", "error"] subprocess.run(cmd) if write_text: font_file = font_files[platform.system()] draw_str = ( f"drawtext=fontsize={text_size}:fontfile={font_file}:fontcolor={text_color}:text='{text1}':x=10:y=20," f"drawtext=fontsize={text_size}:fontfile={font_file}:fontcolor={text_color}:text='{text2}':x=10:y=h/2+20" ) cmd = [ FFMPEG_PATH, "-i", tmp_file, "-y", "-vf", draw_str, "-c:a", "copy", out_path, ] if not verbose: cmd += ["-hide_banner", "-loglevel", "error"] subprocess.run(cmd) os.remove(tmp_file) def vstack_video_arr( video_arr, out_path, crf=25, verbose=True, text_arr=None, text_color="white", text_size=60, ): assert len(video_arr) > 1 tmp_file1 = f"{osp.splitext(out_path)[0]}_tmp1.mp4" tmp_file2 = f"{osp.splitext(out_path)[0]}_tmp2.mp4" height = np.array([get_video_width_height(x)[1] for x in video_arr]) start_h = np.concatenate([np.array([0]), np.cumsum(height)[:-1]]) os.makedirs(osp.dirname(out_path), exist_ok=True) for i in range(1, len(video_arr)): prev_video = video_arr[0] if i == 1 else tmp_file1 cmd = [ FFMPEG_PATH, "-y", "-i", prev_video, "-i", video_arr[i], "-filter_complex", "vstack,format=yuv420p", "-vcodec", "libx264", "-crf", f"{crf}", tmp_file2, ] if not verbose: cmd += ["-hide_banner", "-loglevel", "error"] subprocess.run(cmd) tmp_file1, tmp_file2 = tmp_file2, tmp_file1 if text_arr is not None: font_file = font_files[platform.system()] draw_str = ",".join( [ f"drawtext=fontsize={text_size}:fontfile={font_file}:fontcolor={text_color}:text='{x}':x=10:y={h}+20" for h, x in zip(start_h, text_arr) ] ) cmd = [ FFMPEG_PATH, "-i", tmp_file1, "-y", "-vf", draw_str, "-c:a", "copy", out_path, ] if not verbose: cmd += ["-hide_banner", "-loglevel", "error"] subprocess.run(cmd) else: os.rename(tmp_file1, out_path) if os.path.exists(tmp_file1): os.remove(tmp_file1) if os.path.exists(tmp_file2): os.remove(tmp_file2) def hstack_video_arr( video_arr, out_path, crf=25, verbose=True, text_arr=None, text_color="white", text_size=60, ): assert len(video_arr) > 1 tmp_file1 = f"{osp.splitext(out_path)[0]}_tmp1.mp4" tmp_file2 = f"{osp.splitext(out_path)[0]}_tmp2.mp4" width = np.array([get_video_width_height(x)[0] for x in video_arr]) start_w = np.concatenate([np.array([0]), np.cumsum(width)[:-1]]) os.makedirs(osp.dirname(out_path), exist_ok=True) for i in range(1, len(video_arr)): prev_video = video_arr[0] if i == 1 else tmp_file1 cmd = [ FFMPEG_PATH, "-y", "-i", prev_video, "-i", video_arr[i], "-filter_complex", "hstack,format=yuv420p", "-vcodec", "libx264", "-crf", f"{crf}", tmp_file2, ] if not verbose: cmd += ["-hide_banner", "-loglevel", "error"] subprocess.run(cmd) tmp_file1, tmp_file2 = tmp_file2, tmp_file1 if text_arr is not None: font_file = font_files[platform.system()] draw_str = ",".join( [ f"drawtext=fontsize={text_size}:fontfile={font_file}:fontcolor={text_color}:text='{x}':x={w}+10:y=20" for w, x in zip(start_w, text_arr) ] ) cmd = [ FFMPEG_PATH, "-i", tmp_file1, "-y", "-vf", draw_str, "-c:a", "copy", out_path, ] if not verbose: cmd += ["-hide_banner", "-loglevel", "error"] subprocess.run(cmd) else: os.rename(tmp_file1, out_path) if os.path.exists(tmp_file1): os.remove(tmp_file1) if os.path.exists(tmp_file2): os.remove(tmp_file2) def make_checker_board_texture( color1="black", color2="white", width=10, height=10, n_tile=15, to_bgr=False ): c1 = np.asarray(ImageColor.getcolor(color1, "RGB")).astype(np.uint8) c2 = np.asarray(ImageColor.getcolor(color2, "RGB")).astype(np.uint8) if to_bgr: c1 = c1[[2, 1, 0]] c2 = c2[[2, 1, 0]] hw = width // 2 hh = height // 2 c1_block = np.tile(c1, (hh, hw, 1)) c2_block = np.tile(c2, (hh, hw, 1)) tex = np.block([[[c1_block], [c2_block]], [[c2_block], [c1_block]]]) tex = np.tile(tex, (n_tile, n_tile, 1)) return tex def resize_bbox(bbox, scale): x1, y1, x2, y2 = bbox[..., 0], bbox[..., 1], bbox[..., 2], bbox[..., 3] h, w = y2 - y1, x2 - x1 cx, cy = x1 + 0.5 * w, y1 + 0.5 * h h_new, w_new = h * scale, w * scale x1_new, x2_new = cx - 0.5 * w_new, cx + 0.5 * w_new y1_new, y2_new = cy - 0.5 * h_new, cy + 0.5 * h_new bbox_new = np.stack([x1_new, y1_new, x2_new, y2_new], axis=-1) return bbox_new def nparray_to_vtk_matrix(array): """Convert a numpy.ndarray to a vtk.vtkMatrix4x4""" import vtk matrix = vtk.vtkMatrix4x4() for i in range(array.shape[0]): for j in range(array.shape[1]): matrix.SetElement(i, j, array[i, j]) return matrix def vtk_matrix_to_nparray(matrix): """Convert a numpy.ndarray to a vtk.vtkMatrix4x4""" array = np.zeros([4, 4]) for i in range(array.shape[0]): for j in range(array.shape[1]): array[i, j] = matrix.GetElement(i, j) return array def random_color(seed): """Random a color according to the input seed.""" random.seed(seed) colors = sns.color_palette() color = random.choice(colors) return color def draw_tracks( img, bbox, idx, score, thickness=2, font_scale=0.4, text_height=10, text_width=15 ): # taken from mmtracking x1, y1, x2, y2 = bbox.astype(np.int32) # bbox bbox_color = random_color(idx) bbox_color = [int(255 * _c) for _c in bbox_color][::-1] cv.rectangle(img, (x1, y1), (x2, y2), bbox_color, thickness=thickness) # id text = str(idx) width = len(text) * text_width img[y1 : y1 + text_height, x1 : x1 + width, :] = bbox_color cv.putText( img, str(idx), (x1, y1 + text_height - 2), cv.FONT_HERSHEY_COMPLEX, font_scale, color=(0, 0, 0), ) # score text = "{:.02f}".format(score) width = len(text) * text_width img[y1 - text_height : y1, x1 : x1 + width, :] = bbox_color cv.putText( img, text, (x1, y1 - 2), cv.FONT_HERSHEY_COMPLEX, font_scale, color=(0, 0, 0) ) return img def draw_keypoints(img, keypoints, confidence, size=4, color=(255, 0, 255)): for kp, conf in zip(keypoints, confidence): if conf > 0.2: cv.circle( img, np.round(kp).astype(int).tolist(), size, color=color, thickness=-1 ) return img