Upload folder using huggingface_hub

fbb20ff verified 19 days ago

14.1 kB

	import os
	import os.path as osp
	import platform
	import random
	import subprocess

	import cv2 as cv
	import numpy as np
	import seaborn as sns
	from PIL import ImageColor

	FFMPEG_PATH = "/usr/bin/ffmpeg" if osp.exists("/usr/bin/ffmpeg") else "ffmpeg"
	font_files = {
	"Windows": "C:/Windows/Fonts/arial.ttf",
	"Linux": "/usr/share/fonts/truetype/lato/Lato-Regular.ttf",
	"Darwin": "/System/Library/Fonts/Supplemental/Arial.ttf",
	}


	def get_video_width_height(video_file):
	vcap = cv.VideoCapture(video_file)
	img_w = int(vcap.get(3))
	img_h = int(vcap.get(4))
	return img_w, img_h


	def get_video_num_fr(video_file):
	vcap = cv.VideoCapture(video_file)
	num_fr = int(vcap.get(cv.CAP_PROP_FRAME_COUNT))
	return num_fr


	def get_video_fps(video_file):
	vcap = cv.VideoCapture(video_file)
	fps = vcap.get(cv.CAP_PROP_FPS)
	return fps


	def rescale_video(video_path, out_path, width=-1, height=-1, verbose=True):
	os.makedirs(osp.dirname(out_path), exist_ok=True)
	cmd = [
	FFMPEG_PATH,
	"-y",
	"-i",
	video_path,
	"-vf",
	f"scale={width}:{height}",
	out_path,
	]
	if not verbose:
	cmd += ["-hide_banner", "-loglevel", "error"]
	subprocess.run(cmd)


	def crop_video(
	video_path, out_path, width="iw", height="ih", x=0, y=0, crop_str=None, verbose=True
	):
	os.makedirs(osp.dirname(out_path), exist_ok=True)
	if crop_str is None:
	crop_str = f"{width}:{height}:{x}:{y}"
	cmd = [FFMPEG_PATH, "-y", "-i", video_path, "-vf", f"crop={crop_str}", out_path]
	if not verbose:
	cmd += ["-hide_banner", "-loglevel", "error"]
	subprocess.run(cmd)


	def clip_video(video_path, out_path, start, end, verbose=True):
	os.makedirs(osp.dirname(out_path), exist_ok=True)
	cmd = [
	FFMPEG_PATH,
	"-y",
	"-ss",
	f"{start}",
	"-i",
	video_path,
	"-t",
	f"{end - start}",
	out_path,
	]
	if not verbose:
	cmd += ["-hide_banner", "-loglevel", "error"]
	subprocess.run(cmd)


	def text_video(
	video_path,
	out_path,
	text,
	x=10,
	y=30,
	verbose=True,
	text_color="white",
	text_size=60,
	):
	os.makedirs(osp.dirname(out_path), exist_ok=True)
	font_file = font_files[platform.system()]
	draw_str = f"drawtext=fontsize={text_size}:fontfile={font_file}:fontcolor={text_color}:text='{text}':x={x}:y={y}"
	cmd = [FFMPEG_PATH, "-y", "-i", video_path, "-vf", draw_str, out_path]
	if not verbose:
	cmd += ["-hide_banner", "-loglevel", "error"]
	subprocess.run(cmd)


	def make_gif(
	video_path,
	out_path,
	t_start="00:00:00",
	t_end=None,
	width=600,
	height=-1,
	verbose=True,
	):
	os.makedirs(osp.dirname(out_path), exist_ok=True)
	cmd = (
	[FFMPEG_PATH, "-y", "-i", video_path, "-ss", t_start]
	+ (["-t", t_end] if t_end is not None else [])
	+ [
	"-vf",
	f"fps=30,scale={width}:{height}:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse",
	"-loop",
	"0",
	out_path,
	]
	)
	if not verbose:
	cmd += ["-hide_banner", "-loglevel", "error"]
	subprocess.run(cmd)


	def images_to_video(
	img_dir, out_path, img_fmt="%06d.jpg", fps=30, crf=25, verbose=True
	):
	os.makedirs(osp.dirname(out_path), exist_ok=True)
	cmd = [
	FFMPEG_PATH,
	"-y",
	"-r",
	f"{fps}",
	"-f",
	"image2",
	"-start_number",
	"0",
	"-i",
	f"{img_dir}/{img_fmt}",
	"-vcodec",
	"libx264",
	"-vf",
	"pad=ceil(iw/2)2:ceil(ih/2)2",
	"-crf",
	f"{crf}",
	"-pix_fmt",
	"yuv420p",
	out_path,
	]
	if not verbose:
	cmd += ["-hide_banner", "-loglevel", "error"]
	p = subprocess.run(cmd)
	if p.returncode != 0:
	raise Exception("Something went wrong during images_to_video!")


	def video_to_images(video_path, out_path, img_fmt="%06d.jpg", fps=30, verbose=True):
	os.makedirs(out_path, exist_ok=True)
	cmd = [FFMPEG_PATH, "-i", video_path, "-r", f"{fps}", f"{out_path}/{img_fmt}"]
	if not verbose:
	cmd += ["-hide_banner", "-loglevel", "error"]
	p = subprocess.run(cmd)
	if p.returncode != 0:
	raise Exception("Something went wrong during video_to_images!")


	def hstack_videos(
	video1_path,
	video2_path,
	out_path,
	crf=25,
	verbose=True,
	text1=None,
	text2=None,
	text_color="white",
	text_size=60,
	):
	if not (text1 is None or text2 is None):
	write_text = True
	tmp_file = f"{osp.splitext(out_path)[0]}_tmp.mp4"
	else:
	write_text = False

	os.makedirs(osp.dirname(out_path), exist_ok=True)
	cmd = [
	FFMPEG_PATH,
	"-y",
	"-i",
	video1_path,
	"-i",
	video2_path,
	"-filter_complex",
	"hstack,format=yuv420p",
	"-vcodec",
	"libx264",
	"-crf",
	f"{crf}",
	tmp_file if write_text else out_path,
	]
	if not verbose:
	cmd += ["-hide_banner", "-loglevel", "error"]
	subprocess.run(cmd)

	if write_text:
	font_file = font_files[platform.system()]
	draw_str = (
	f"drawtext=fontsize={text_size}:fontfile={font_file}:fontcolor={text_color}:text='{text1}':x=(w-text_w)/4:y=20,"
	f"drawtext=fontsize={text_size}:fontfile={font_file}:fontcolor={text_color}:text='{text2}':x=3*(w-text_w)/4:y=20"
	)
	cmd = [
	FFMPEG_PATH,
	"-i",
	tmp_file,
	"-y",
	"-vf",
	draw_str,
	"-c:a",
	"copy",
	out_path,
	]
	if not verbose:
	cmd += ["-hide_banner", "-loglevel", "error"]
	subprocess.run(cmd)
	os.remove(tmp_file)


	def vstack_videos(
	video1_path,
	video2_path,
	out_path,
	crf=25,
	verbose=True,
	text1=None,
	text2=None,
	text_color="white",
	text_size=60,
	):
	if not (text1 is None or text2 is None):
	write_text = True
	tmp_file = f"{osp.splitext(out_path)[0]}_tmp.mp4"
	else:
	write_text = False

	os.makedirs(osp.dirname(out_path), exist_ok=True)
	cmd = [
	FFMPEG_PATH,
	"-y",
	"-i",
	video1_path,
	"-i",
	video2_path,
	"-filter_complex",
	"vstack,format=yuv420p",
	"-vcodec",
	"libx264",
	"-crf",
	f"{crf}",
	tmp_file if write_text else out_path,
	]
	if not verbose:
	cmd += ["-hide_banner", "-loglevel", "error"]
	subprocess.run(cmd)

	if write_text:
	font_file = font_files[platform.system()]
	draw_str = (
	f"drawtext=fontsize={text_size}:fontfile={font_file}:fontcolor={text_color}:text='{text1}':x=10:y=20,"
	f"drawtext=fontsize={text_size}:fontfile={font_file}:fontcolor={text_color}:text='{text2}':x=10:y=h/2+20"
	)
	cmd = [
	FFMPEG_PATH,
	"-i",
	tmp_file,
	"-y",
	"-vf",
	draw_str,
	"-c:a",
	"copy",
	out_path,
	]
	if not verbose:
	cmd += ["-hide_banner", "-loglevel", "error"]
	subprocess.run(cmd)
	os.remove(tmp_file)


	def vstack_video_arr(
	video_arr,
	out_path,
	crf=25,
	verbose=True,
	text_arr=None,
	text_color="white",
	text_size=60,
	):
	assert len(video_arr) > 1
	tmp_file1 = f"{osp.splitext(out_path)[0]}_tmp1.mp4"
	tmp_file2 = f"{osp.splitext(out_path)[0]}_tmp2.mp4"

	height = np.array([get_video_width_height(x)[1] for x in video_arr])
	start_h = np.concatenate([np.array([0]), np.cumsum(height)[:-1]])

	os.makedirs(osp.dirname(out_path), exist_ok=True)

	for i in range(1, len(video_arr)):
	prev_video = video_arr[0] if i == 1 else tmp_file1
	cmd = [
	FFMPEG_PATH,
	"-y",
	"-i",
	prev_video,
	"-i",
	video_arr[i],
	"-filter_complex",
	"vstack,format=yuv420p",
	"-vcodec",
	"libx264",
	"-crf",
	f"{crf}",
	tmp_file2,
	]
	if not verbose:
	cmd += ["-hide_banner", "-loglevel", "error"]
	subprocess.run(cmd)
	tmp_file1, tmp_file2 = tmp_file2, tmp_file1

	if text_arr is not None:
	font_file = font_files[platform.system()]
	draw_str = ",".join(
	[
	f"drawtext=fontsize={text_size}:fontfile={font_file}:fontcolor={text_color}:text='{x}':x=10:y={h}+20"
	for h, x in zip(start_h, text_arr)
	]
	)
	cmd = [
	FFMPEG_PATH,
	"-i",
	tmp_file1,
	"-y",
	"-vf",
	draw_str,
	"-c:a",
	"copy",
	out_path,
	]
	if not verbose:
	cmd += ["-hide_banner", "-loglevel", "error"]
	subprocess.run(cmd)
	else:
	os.rename(tmp_file1, out_path)

	if os.path.exists(tmp_file1):
	os.remove(tmp_file1)
	if os.path.exists(tmp_file2):
	os.remove(tmp_file2)


	def hstack_video_arr(
	video_arr,
	out_path,
	crf=25,
	verbose=True,
	text_arr=None,
	text_color="white",
	text_size=60,
	):
	assert len(video_arr) > 1
	tmp_file1 = f"{osp.splitext(out_path)[0]}_tmp1.mp4"
	tmp_file2 = f"{osp.splitext(out_path)[0]}_tmp2.mp4"

	width = np.array([get_video_width_height(x)[0] for x in video_arr])
	start_w = np.concatenate([np.array([0]), np.cumsum(width)[:-1]])

	os.makedirs(osp.dirname(out_path), exist_ok=True)

	for i in range(1, len(video_arr)):
	prev_video = video_arr[0] if i == 1 else tmp_file1
	cmd = [
	FFMPEG_PATH,
	"-y",
	"-i",
	prev_video,
	"-i",
	video_arr[i],
	"-filter_complex",
	"hstack,format=yuv420p",
	"-vcodec",
	"libx264",
	"-crf",
	f"{crf}",
	tmp_file2,
	]
	if not verbose:
	cmd += ["-hide_banner", "-loglevel", "error"]
	subprocess.run(cmd)
	tmp_file1, tmp_file2 = tmp_file2, tmp_file1

	if text_arr is not None:
	font_file = font_files[platform.system()]
	draw_str = ",".join(
	[
	f"drawtext=fontsize={text_size}:fontfile={font_file}:fontcolor={text_color}:text='{x}':x={w}+10:y=20"
	for w, x in zip(start_w, text_arr)
	]
	)
	cmd = [
	FFMPEG_PATH,
	"-i",
	tmp_file1,
	"-y",
	"-vf",
	draw_str,
	"-c:a",
	"copy",
	out_path,
	]
	if not verbose:
	cmd += ["-hide_banner", "-loglevel", "error"]
	subprocess.run(cmd)
	else:
	os.rename(tmp_file1, out_path)

	if os.path.exists(tmp_file1):
	os.remove(tmp_file1)
	if os.path.exists(tmp_file2):
	os.remove(tmp_file2)


	def make_checker_board_texture(
	color1="black", color2="white", width=10, height=10, n_tile=15, to_bgr=False
	):
	c1 = np.asarray(ImageColor.getcolor(color1, "RGB")).astype(np.uint8)
	c2 = np.asarray(ImageColor.getcolor(color2, "RGB")).astype(np.uint8)
	if to_bgr:
	c1 = c1[[2, 1, 0]]
	c2 = c2[[2, 1, 0]]
	hw = width // 2
	hh = height // 2
	c1_block = np.tile(c1, (hh, hw, 1))
	c2_block = np.tile(c2, (hh, hw, 1))
	tex = np.block([[[c1_block], [c2_block]], [[c2_block], [c1_block]]])
	tex = np.tile(tex, (n_tile, n_tile, 1))
	return tex


	def resize_bbox(bbox, scale):
	x1, y1, x2, y2 = bbox[..., 0], bbox[..., 1], bbox[..., 2], bbox[..., 3]
	h, w = y2 - y1, x2 - x1
	cx, cy = x1 + 0.5 * w, y1 + 0.5 * h
	h_new, w_new = h * scale, w * scale
	x1_new, x2_new = cx - 0.5 * w_new, cx + 0.5 * w_new
	y1_new, y2_new = cy - 0.5 * h_new, cy + 0.5 * h_new
	bbox_new = np.stack([x1_new, y1_new, x2_new, y2_new], axis=-1)
	return bbox_new


	def nparray_to_vtk_matrix(array):
	"""Convert a numpy.ndarray to a vtk.vtkMatrix4x4"""
	import vtk

	matrix = vtk.vtkMatrix4x4()
	for i in range(array.shape[0]):
	for j in range(array.shape[1]):
	matrix.SetElement(i, j, array[i, j])
	return matrix


	def vtk_matrix_to_nparray(matrix):
	"""Convert a numpy.ndarray to a vtk.vtkMatrix4x4"""
	array = np.zeros([4, 4])
	for i in range(array.shape[0]):
	for j in range(array.shape[1]):
	array[i, j] = matrix.GetElement(i, j)
	return array


	def random_color(seed):
	"""Random a color according to the input seed."""
	random.seed(seed)
	colors = sns.color_palette()
	color = random.choice(colors)
	return color


	def draw_tracks(
	img, bbox, idx, score, thickness=2, font_scale=0.4, text_height=10, text_width=15
	):
	# taken from mmtracking
	x1, y1, x2, y2 = bbox.astype(np.int32)

	# bbox
	bbox_color = random_color(idx)
	bbox_color = [int(255 * _c) for _c in bbox_color][::-1]
	cv.rectangle(img, (x1, y1), (x2, y2), bbox_color, thickness=thickness)

	# id
	text = str(idx)
	width = len(text) * text_width
	img[y1 : y1 + text_height, x1 : x1 + width, :] = bbox_color
	cv.putText(
	img,
	str(idx),
	(x1, y1 + text_height - 2),
	cv.FONT_HERSHEY_COMPLEX,
	font_scale,
	color=(0, 0, 0),
	)

	# score
	text = "{:.02f}".format(score)
	width = len(text) * text_width
	img[y1 - text_height : y1, x1 : x1 + width, :] = bbox_color
	cv.putText(
	img, text, (x1, y1 - 2), cv.FONT_HERSHEY_COMPLEX, font_scale, color=(0, 0, 0)
	)
	return img


	def draw_keypoints(img, keypoints, confidence, size=4, color=(255, 0, 255)):
	for kp, conf in zip(keypoints, confidence):
	if conf > 0.2:
	cv.circle(
	img, np.round(kp).astype(int).tolist(), size, color=color, thickness=-1
	)
	return img