Spaces:

saptak21
/

Face_Direction_Detection

Sleeping

App Files Files Community

Face_Direction_Detection / gazelib /gaze /normalize.py

saptak21

Upload 10 files

78dd6e8 verified about 1 month ago

raw

history blame contribute delete

9.87 kB

	# -- coding: utf-8 --
	"""
	######################################################################################################################################
	This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. To view a copy of this license,
	visit http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.

	Any publications arising from the use of this software, including but
	not limited to academic journal and conference publications, technical
	reports and manuals, must cite at least one of the following works:

	Revisiting Data Normalization for Appearance-Based Gaze Estimation
	Xucong Zhang, Yusuke Sugano, Andreas Bulling
	in Proc. International Symposium on Eye Tracking Research and Applications (ETRA), 2018
	######################################################################################################################################
	"""

	import os
	import cv2
	import numpy as np
	import csv
	import argparse
	# import dlib
	import glob





	def normalize_woimg(landmarks, focal_norm, distance_norm, roi_size, center, hr, ht, cam, gc=None):
	center = center.reshape(3,1)
	## universal function for data normalization
	hR = cv2.Rodrigues(hr)[0] # rotation matrix

	## ---------- normalize image ----------
	distance = np.linalg.norm(center) # actual distance between eye and original camera

	z_scale = distance_norm/distance
	cam_norm = np.array([
	[focal_norm, 0, roi_size[0]/2],
	[0, focal_norm, roi_size[1]/2],
	[0, 0, 1.0],
	])
	S = np.array([ # scaling matrix
	[1.0, 0.0, 0.0],
	[0.0, 1.0, 0.0],
	[0.0, 0.0, z_scale],
	])

	hRx = hR[:,0]
	forward = (center/distance).reshape(3)
	down = np.cross(forward, hRx)
	down /= np.linalg.norm(down)
	right = np.cross(down, forward)
	right /= np.linalg.norm(right)
	R = np.c_[right, down, forward].T # rotation matrix R

	W = np.dot(np.dot(cam_norm, S), np.dot(R, np.linalg.inv(cam))) # transformation matrix

	## ---------- normalize rotation ----------
	hR_norm = np.dot(R, hR) # rotation matrix in normalized space
	# hr_norm = cv2.Rodrigues(hR_norm)[0] # convert rotation matrix to rotation vectors

	## ---------- normalize gaze vector ----------
	gc_normalized = None

	num_point = landmarks.shape[0]
	landmarks_warped = cv2.perspectiveTransform(landmarks.reshape(-1,1,2).astype('float32'), W)
	landmarks_warped = landmarks_warped.reshape(num_point, 2)
	if gc is not None:
	gc_normalized = gc.reshape((3,1)) - center # gaze vector
	# For modified data normalization, scaling is not applied to gaze direction (only R applied).
	# For original data normalization, here should be:
	# "M = np.dot(S,R)
	# gc_normalized = np.dot(R, gc_normalized)"
	gc_normalized = np.dot(R, gc_normalized)
	gc_normalized = gc_normalized/np.linalg.norm(gc_normalized)

	return [None, R, hR_norm, gc_normalized, landmarks_warped, W]


	def normalize(img, landmarks, focal_norm, distance_norm, roi_size, center, hr, ht, cam, gc=None):
	center = center.reshape(3,1)
	## universal function for data normalization
	hR = cv2.Rodrigues(hr)[0] # rotation matrix

	## ---------- normalize image ----------
	distance = np.linalg.norm(center) # actual distance between eye and original camera

	z_scale = distance_norm/distance
	cam_norm = np.array([
	[focal_norm, 0, roi_size[0]/2],
	[0, focal_norm, roi_size[1]/2],
	[0, 0, 1.0],
	])
	S = np.array([ # scaling matrix
	[1.0, 0.0, 0.0],
	[0.0, 1.0, 0.0],
	[0.0, 0.0, z_scale],
	])

	hRx = hR[:,0]
	forward = (center/distance).reshape(3)
	down = np.cross(forward, hRx)
	down /= np.linalg.norm(down)
	right = np.cross(down, forward)
	right /= np.linalg.norm(right)
	R = np.c_[right, down, forward].T # rotation matrix R
	W = np.dot(np.dot(cam_norm, S), np.dot(R, np.linalg.inv(cam))) # transformation matrix

	# if img is not None:
	# img_warped = cv2.warpPerspective(img, W, roi_size) # image normalization
	# else:
	# img_warped = None

	img_warped = cv2.warpPerspective(img, W, roi_size) # image normalization
	## ---------- normalize rotation ----------
	hR_norm = np.dot(R, hR) # rotation matrix in normalized space
	# hr_norm = cv2.Rodrigues(hR_norm)[0] # convert rotation matrix to rotation vectors

	## ---------- normalize gaze vector ----------
	gc_normalized = None
	num_point = landmarks.shape[0]
	landmarks_warped = cv2.perspectiveTransform(landmarks.reshape(-1,1,2).astype('float32'), W)
	landmarks_warped = landmarks_warped.reshape(num_point, 2)
	if gc is not None:
	gc_normalized = gc.reshape((3,1)) - center # gaze vector
	# For modified data normalization, scaling is not applied to gaze direction (only R applied).
	# For original data normalization, here should be:
	# "M = np.dot(S,R)
	# gc_normalized = np.dot(R, gc_normalized)"
	gc_normalized = np.dot(R, gc_normalized)
	gc_normalized = gc_normalized/np.linalg.norm(gc_normalized)

	return [img_warped, R, hR_norm, gc_normalized, landmarks_warped, W]

	def normalize_face(img, face, hr, ht, cam, gc=None):
	## normalized camera parameters
	focal_norm = 960 # focal length of normalized camera
	distance_norm = 600 # normalized distance between eye and camera
	roi_size = (224, 224) # size of cropped eye image

	## compute estimated 3D positions of the landmarks
	ht = ht.reshape((3,1))
	hR = cv2.Rodrigues(hr)[0] # rotation matrix
	Fc = np.dot(hR, face) + ht # 3D positions of facial landmarks
	# fm = np.mean(Fc, axis=1).reshape((3,1)) # center of facial landmarks
	two_eye_center = np.mean(Fc[:, 0:4], axis=1).reshape((3, 1))
	nose_center = np.mean(Fc[:, 4:6], axis=1).reshape((3, 1))
	# get the face center
	face_center = np.mean(np.concatenate((two_eye_center, nose_center), axis=1), axis=1).reshape((3, 1))
	# face_center = np.mean(Fc, axis=1).reshape((3,1))
	return normalize(img, focal_norm, distance_norm, roi_size, face_center, hr, ht, cam, gc)

	def normalize_eye(img, face, hr, ht, cam, gc=None):
	## normalized camera parameters
	focal_norm = 960 # focal length of normalized camera
	distance_norm = 600 # normalized distance between eye and camera
	roi_size = (60, 36) # size of cropped eye image

	## compute estimated 3D positions of the landmarks
	ht = ht.reshape((3,1))
	hR = cv2.Rodrigues(hr)[0] # rotation matrix
	Fc = np.dot(hR, face) + ht # 3D positions of facial landmarks
	re = 0.5*(Fc[:,0] + Fc[:,1]).reshape((3,1)) # center of left eye
	le = 0.5*(Fc[:,2] + Fc[:,3]).reshape((3,1)) # center of right eye

	## normalize each eye
	data = [
	normalize(img, focal_norm, distance_norm, roi_size, re, hr, ht, cam, gc),
	normalize(img, focal_norm, distance_norm, roi_size, le, hr, ht, cam, gc)
	]
	return data

	def load_calibration(calib_path):
	## load calibration data, these paramters are expected to be obtained by camera calibration functions in OpenCV
	fs = cv2.FileStorage(calib_path, cv2.FILE_STORAGE_READ)
	camera_matrix = fs.getNode('camera_matrix').mat()
	camera_distortion = fs.getNode('dist_coeffs').mat()
	return camera_matrix, camera_distortion

	def load_facemodel(model_path):
	# load the generic face model, which includes 6 facial landmarks: four eye corners and two mouth corners
	fs = cv2.FileStorage(model_path, cv2.FILE_STORAGE_READ)
	face_model = fs.getNode('face_model').mat()
	return face_model

	def read_image(img_path, camera_matrix, camera_distortion):
	# load input image and undistort
	img_original = cv2.imread(img_path)
	img = cv2.undistort(img_original, camera_matrix, camera_distortion)

	return img

	def estimateHeadPose(landmarks, face_model, camera, distortion, iterate=True):
	ret, rvec, tvec = cv2.solvePnP(face_model, landmarks, camera, distortion, flags=cv2.SOLVEPNP_EPNP)

	## further optimize
	if iterate:
	ret, rvec, tvec = cv2.solvePnP(face_model, landmarks, camera, distortion, rvec, tvec, True)

	return rvec, tvec

	def detect_landmark(img, detector_path, predictor_path):
	## obtain facial landmarks using dlib
	detector = dlib.cnn_face_detection_model_v1(detector_path)
	dets = detector(img, 0)

	if len(dets) == 0:
	return None

	predictor = dlib.shape_predictor(predictor_path)
	shape = predictor(img, dets[0].rect)

	## extract required keypoints
	landmarks = np.array([
	[shape.part(36).x, shape.part(36).y],
	[shape.part(39).x, shape.part(39).y],
	[shape.part(42).x, shape.part(42).y],
	[shape.part(45).x, shape.part(45).y],
	[shape.part(48).x, shape.part(48).y],
	[shape.part(54).x, shape.part(54).y]
	])

	return landmarks


	def read_landmark(img_path):
	img_file = img_path.split(os.path.sep)[-1]
	day = img_path.split(os.path.sep)[-2]
	person = img_path.split(os.path.sep)[-3]
	person_path = os.path.split(os.path.split(img_path)[0])[0]

	person_txt = os.path.join(person_path, person+'.txt')
	index = os.path.join(day,img_file)
	print(person_txt)
	print(index)

	with open(person_txt) as f:
	data = f.readlines()
	reader = csv.reader(data)
	p = {}
	for row in reader:
	words = row[0].split()
	p[words[0]] = words[1:]
	landmarks = np.array([int(i) for i in p[index][2:14]]).reshape((6,2))
	return landmarks

	# def process_image(img_path, detector_path, predictor_path, camera_matrix, camera_distortion, face_model, gc=None):
	# # read input image
	# img = read_image(img_path, camera_matrix, camera_distortion)

	# # detect facial landmarks
	# landmarks = detect_landmark(img, detector_path, predictor_path)

	# if landmarks is not None:
	# # estimate head pose
	# hr, ht = estimateHeadPose(face_model, landmarks, camera_matrix, camera_distortion)

	# # data normalization for left and right eye image
	# normalized_eyes = normalize_eye(img, face_model, hr, ht, camera_matrix, gc)

	# # data normalization for full face
	# normalized_face = normalize_face(img, face_model, hr, ht, camera_matrix, gc)

	# # return a list of [reye, leye, face]
	# return normalized_eyes + [normalized_face]