Spaces:

saptak21
/

Face_Direction_Detection

Sleeping

File size: 9,867 Bytes

78dd6e8

# -*- coding: utf-8 -*-
"""
######################################################################################################################################
This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. To view a copy of this license,
visit http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.

Any publications arising from the use of this software, including but
not limited to academic journal and conference publications, technical
reports and manuals, must cite at least one of the following works:

Revisiting Data Normalization for Appearance-Based Gaze Estimation
Xucong Zhang, Yusuke Sugano, Andreas Bulling
in Proc. International Symposium on Eye Tracking Research and Applications (ETRA), 2018
######################################################################################################################################
"""

import os
import cv2
import numpy as np
import csv
import argparse
# import dlib
import glob


	

	
def normalize_woimg(landmarks, focal_norm, distance_norm, roi_size, center, hr, ht, cam, gc=None):
	center = center.reshape(3,1)
	## universal function for data normalization
	hR = cv2.Rodrigues(hr)[0] # rotation matrix

	## ---------- normalize image ----------
	distance = np.linalg.norm(center) # actual distance between eye and original camera

	z_scale = distance_norm/distance
	cam_norm = np.array([
		[focal_norm, 0, roi_size[0]/2],
		[0, focal_norm, roi_size[1]/2],
		[0, 0, 1.0],
	])
	S = np.array([ # scaling matrix
		[1.0, 0.0, 0.0],
		[0.0, 1.0, 0.0],
		[0.0, 0.0, z_scale],
	])

	hRx = hR[:,0]
	forward = (center/distance).reshape(3)
	down = np.cross(forward, hRx)
	down /= np.linalg.norm(down)
	right = np.cross(down, forward)
	right /= np.linalg.norm(right)
	R = np.c_[right, down, forward].T # rotation matrix R

	W = np.dot(np.dot(cam_norm, S), np.dot(R, np.linalg.inv(cam))) # transformation matrix

	## ---------- normalize rotation ----------
	hR_norm = np.dot(R, hR) # rotation matrix in normalized space
	# hr_norm = cv2.Rodrigues(hR_norm)[0] # convert rotation matrix to rotation vectors

	## ---------- normalize gaze vector ----------
	gc_normalized = None

	num_point = landmarks.shape[0]
	landmarks_warped = cv2.perspectiveTransform(landmarks.reshape(-1,1,2).astype('float32'), W)
	landmarks_warped = landmarks_warped.reshape(num_point, 2)
	if gc is not None:
		gc_normalized = gc.reshape((3,1)) - center # gaze vector
		# For modified data normalization, scaling is not applied to gaze direction (only R applied).
		# For original data normalization, here should be:
		# "M = np.dot(S,R)
		# gc_normalized = np.dot(R, gc_normalized)"
		gc_normalized = np.dot(R, gc_normalized)
		gc_normalized = gc_normalized/np.linalg.norm(gc_normalized)

	return [None, R, hR_norm, gc_normalized, landmarks_warped, W]

	
def normalize(img, landmarks, focal_norm, distance_norm, roi_size, center, hr, ht, cam, gc=None):
	center = center.reshape(3,1)
	## universal function for data normalization
	hR = cv2.Rodrigues(hr)[0] # rotation matrix

	## ---------- normalize image ----------
	distance = np.linalg.norm(center) # actual distance between eye and original camera

	z_scale = distance_norm/distance
	cam_norm = np.array([
		[focal_norm, 0, roi_size[0]/2],
		[0, focal_norm, roi_size[1]/2],
		[0, 0, 1.0],
	])
	S = np.array([ # scaling matrix
		[1.0, 0.0, 0.0],
		[0.0, 1.0, 0.0],
		[0.0, 0.0, z_scale],
	])

	hRx = hR[:,0]
	forward = (center/distance).reshape(3)
	down = np.cross(forward, hRx)
	down /= np.linalg.norm(down)
	right = np.cross(down, forward)
	right /= np.linalg.norm(right)
	R = np.c_[right, down, forward].T # rotation matrix R
	W = np.dot(np.dot(cam_norm, S), np.dot(R, np.linalg.inv(cam))) # transformation matrix

	# if img is not None:
	# 	img_warped = cv2.warpPerspective(img, W, roi_size) # image normalization
	# else:
	# 	img_warped = None
	
	img_warped = cv2.warpPerspective(img, W, roi_size) # image normalization
	## ---------- normalize rotation ----------
	hR_norm = np.dot(R, hR) # rotation matrix in normalized space
	# hr_norm = cv2.Rodrigues(hR_norm)[0] # convert rotation matrix to rotation vectors

	## ---------- normalize gaze vector ----------
	gc_normalized = None
	num_point = landmarks.shape[0]
	landmarks_warped = cv2.perspectiveTransform(landmarks.reshape(-1,1,2).astype('float32'), W)
	landmarks_warped = landmarks_warped.reshape(num_point, 2)
	if gc is not None:
		gc_normalized = gc.reshape((3,1)) - center # gaze vector
		# For modified data normalization, scaling is not applied to gaze direction (only R applied).
		# For original data normalization, here should be:
		# "M = np.dot(S,R)
		# gc_normalized = np.dot(R, gc_normalized)"
		gc_normalized = np.dot(R, gc_normalized)
		gc_normalized = gc_normalized/np.linalg.norm(gc_normalized)

	return [img_warped, R, hR_norm, gc_normalized, landmarks_warped, W]

def normalize_face(img, face, hr, ht, cam, gc=None):
	## normalized camera parameters
	focal_norm = 960 # focal length of normalized camera
	distance_norm = 600 # normalized distance between eye and camera
	roi_size = (224, 224) # size of cropped eye image

	## compute estimated 3D positions of the landmarks
	ht = ht.reshape((3,1))
	hR = cv2.Rodrigues(hr)[0] # rotation matrix
	Fc = np.dot(hR, face) + ht # 3D positions of facial landmarks
	# fm = np.mean(Fc, axis=1).reshape((3,1)) # center of facial landmarks
	two_eye_center = np.mean(Fc[:, 0:4], axis=1).reshape((3, 1))
	nose_center = np.mean(Fc[:, 4:6], axis=1).reshape((3, 1))
	# get the face center
	face_center = np.mean(np.concatenate((two_eye_center, nose_center), axis=1), axis=1).reshape((3, 1))
	# face_center = np.mean(Fc, axis=1).reshape((3,1)) 
	return normalize(img, focal_norm, distance_norm, roi_size, face_center, hr, ht, cam, gc)

def normalize_eye(img, face, hr, ht, cam, gc=None):
	## normalized camera parameters
	focal_norm = 960 # focal length of normalized camera
	distance_norm = 600 # normalized distance between eye and camera
	roi_size = (60, 36) # size of cropped eye image

	## compute estimated 3D positions of the landmarks
	ht = ht.reshape((3,1))
	hR = cv2.Rodrigues(hr)[0] # rotation matrix
	Fc = np.dot(hR, face) + ht # 3D positions of facial landmarks
	re = 0.5*(Fc[:,0] + Fc[:,1]).reshape((3,1)) # center of left eye
	le = 0.5*(Fc[:,2] + Fc[:,3]).reshape((3,1)) # center of right eye

	## normalize each eye
	data = [
		normalize(img, focal_norm, distance_norm, roi_size, re, hr, ht, cam, gc),
		normalize(img, focal_norm, distance_norm, roi_size, le, hr, ht, cam, gc)
	]
	return data

def load_calibration(calib_path):
	## load calibration data, these paramters are expected to be obtained by camera calibration functions in OpenCV
	fs = cv2.FileStorage(calib_path, cv2.FILE_STORAGE_READ)
	camera_matrix = fs.getNode('camera_matrix').mat()
	camera_distortion = fs.getNode('dist_coeffs').mat()
	return camera_matrix, camera_distortion

def load_facemodel(model_path):
	# load the generic face model, which includes 6 facial landmarks: four eye corners and two mouth corners
	fs = cv2.FileStorage(model_path, cv2.FILE_STORAGE_READ)
	face_model = fs.getNode('face_model').mat()
	return face_model

def read_image(img_path, camera_matrix, camera_distortion):
	# load input image and undistort
	img_original = cv2.imread(img_path)
	img = cv2.undistort(img_original, camera_matrix, camera_distortion)

	return img

def estimateHeadPose(landmarks, face_model, camera, distortion, iterate=True):
	ret, rvec, tvec = cv2.solvePnP(face_model, landmarks, camera, distortion, flags=cv2.SOLVEPNP_EPNP)

	## further optimize
	if iterate:
		ret, rvec, tvec = cv2.solvePnP(face_model, landmarks, camera, distortion, rvec, tvec, True)

	return rvec, tvec

def detect_landmark(img, detector_path, predictor_path):
	## obtain facial landmarks using dlib
	detector = dlib.cnn_face_detection_model_v1(detector_path)
	dets = detector(img, 0)

	if len(dets) == 0:
		return None

	predictor = dlib.shape_predictor(predictor_path)
	shape = predictor(img, dets[0].rect)

	## extract required keypoints
	landmarks = np.array([
		[shape.part(36).x, shape.part(36).y],
		[shape.part(39).x, shape.part(39).y],
		[shape.part(42).x, shape.part(42).y],
		[shape.part(45).x, shape.part(45).y],
		[shape.part(48).x, shape.part(48).y],
		[shape.part(54).x, shape.part(54).y]
	])

	return landmarks


def read_landmark(img_path):
	img_file = img_path.split(os.path.sep)[-1]
	day = img_path.split(os.path.sep)[-2]
	person = img_path.split(os.path.sep)[-3]
	person_path = os.path.split(os.path.split(img_path)[0])[0]

	person_txt = os.path.join(person_path, person+'.txt')
	index = os.path.join(day,img_file)
	print(person_txt)
	print(index)

	with open(person_txt) as f:
		data = f.readlines()
	reader = csv.reader(data)
	p = {}
	for row in reader:
		words = row[0].split()
		p[words[0]] = words[1:]
	landmarks = np.array([int(i) for i in p[index][2:14]]).reshape((6,2))
	return landmarks

# def process_image(img_path, detector_path, predictor_path, camera_matrix, camera_distortion, face_model, gc=None):
#     # read input image
#     img = read_image(img_path, camera_matrix, camera_distortion)

#     # detect facial landmarks
#     landmarks = detect_landmark(img, detector_path, predictor_path)

#     if landmarks is not None:
#         # estimate head pose
#         hr, ht = estimateHeadPose(face_model, landmarks, camera_matrix, camera_distortion)

#         # data normalization for left and right eye image
#         normalized_eyes = normalize_eye(img, face_model, hr, ht, camera_matrix, gc)

#         # data normalization for full face
#         normalized_face = normalize_face(img, face_model, hr, ht, camera_matrix, gc)

#         # return a list of [reye, leye, face]
#         return normalized_eyes + [normalized_face]