torchnet / Extractor.py

push to main

df07554 about 2 years ago

5.16 kB

	from typing import List

	import torch
	import os
	import numpy as np
	import cv2
	import face_alignment
	import subprocess

	from helpers import *


	def get_position(size, padding=0.25):
	x = [0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124,
	0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036,
	0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918,
	0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149,
	0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721,
	0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874,
	0.553364, 0.490127, 0.42689]

	y = [0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
	0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326,
	0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733,
	0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099,
	0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805,
	0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746,
	0.784792, 0.824182, 0.831803, 0.824182]

	x, y = np.array(x), np.array(y)

	x = (x + padding) / (2 * padding + 1)
	y = (y + padding) / (2 * padding + 1)
	x = x * size
	y = y * size
	return np.array(list(zip(x, y)))


	def cal_area(anno):
	return (anno[:, 0].max() - anno[:, 0].min()) * (anno[:, 1].max() - anno[:, 1].min())


	def output_video(p, txt, dst):
	files = os.listdir(p)
	files = sorted(files, key=lambda x: int(os.path.splitext(x)[0]))

	font = cv2.FONT_HERSHEY_SIMPLEX

	for file, line in zip(files, txt):
	img = cv2.imread(os.path.join(p, file))
	h, w, _ = img.shape
	img = cv2.putText(img, line, (w // 8, 11 * h // 12), font, 1.2, (0, 0, 0), 3, cv2.LINE_AA)
	img = cv2.putText(img, line, (w // 8, 11 * h // 12), font, 1.2, (255, 255, 255), 0, cv2.LINE_AA)
	h = h // 2
	w = w // 2
	img = cv2.resize(img, (w, h))
	cv2.imwrite(os.path.join(p, file), img)

	cmd = "ffmpeg -y -i {}/%d.jpg -r 25 \'{}\'".format(p, dst)
	os.system(cmd)


	def transformation_from_points(points1, points2):
	points1 = points1.astype(np.float64)
	points2 = points2.astype(np.float64)

	c1 = np.mean(points1, axis=0)
	c2 = np.mean(points2, axis=0)
	points1 -= c1
	points2 -= c2
	s1 = np.std(points1)
	s2 = np.std(points2)
	points1 /= s1
	points2 /= s2

	U, S, Vt = np.linalg.svd(points1.T * points2)
	R = (U * Vt).T
	return np.vstack([
	np.hstack(((s2 / s1) * R,
	c2.T - (s2 / s1) * R * c1.T)),
	np.matrix([0., 0., 1.])
	])


	def load_video(path: str) -> List[np.ndarray]:
	"""
	adapted original loading code using this tutorial about openCV
	https://learnopencv.com/read-write-and-display-a-video-using-opencv-cpp-python/
	"""
	cap = cv2.VideoCapture(path)
	frames = []

	while cap.isOpened():
	ret, frame = cap.read()

	if ret is True:
	frames.append(frame)
	else:
	break

	cap.release()
	return frames


	def extract_frames(
	video_filepath, recycle_landmarks=False,
	use_gpu=False
	):
	device = 'cuda' if use_gpu else 'cpu'

	fa = face_alignment.FaceAlignment(
	face_alignment.LandmarksType.TWO_D,
	flip_input=False, device=device
	)

	array = load_video(video_filepath)
	array = list(filter(lambda im: not im is None, array))
	# array = [cv2.resize(im, (100, 50), interpolation=cv2.INTER_LANCZOS4)
	# for im in array]

	points = [fa.get_landmarks(I) for I in array]
	front256 = get_position(256)
	prev_landmarks = None
	frames = []

	for point, scene in zip(points, array):
	if point is not None:
	prev_landmarks = point
	elif recycle_landmarks and (prev_landmarks is not None):
	point = prev_landmarks
	else:
	frames.append(None)
	continue

	shape = np.array(point[0])
	shape = shape[17:]
	M = transformation_from_points(
	np.matrix(shape), np.matrix(front256)
	)

	img = cv2.warpAffine(scene, M[:2], (256, 256))
	(x, y) = front256[-20:].mean(0).astype(np.int32)
	w = 160 // 2
	img = img[y - w // 2:y + w // 2, x - w:x + w, ...]
	img = cv2.resize(img, (128, 64))
	frames.append(img)

	return frames


	def export_frames(
	video_filepath, export_images_dir,
	recycle_landmarks=False, use_gpu=False,
	**kwargs
	):
	frames = extract_frames(
	video_filepath, recycle_landmarks=recycle_landmarks,
	use_gpu=use_gpu
	)

	extraction_incomplete = False
	for k, image in enumerate(frames):
	if image is None:
	extraction_incomplete = True
	continue

	export_filepath = os.path.join(export_images_dir, f'{k}.jpg')
	cv2.imwrite(export_filepath, image)

	return extraction_incomplete