saptak21's picture
Upload 10 files
78dd6e8 verified
# -*- coding: utf-8 -*-
"""
######################################################################################################################################
This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. To view a copy of this license,
visit http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
Any publications arising from the use of this software, including but
not limited to academic journal and conference publications, technical
reports and manuals, must cite at least one of the following works:
Revisiting Data Normalization for Appearance-Based Gaze Estimation
Xucong Zhang, Yusuke Sugano, Andreas Bulling
in Proc. International Symposium on Eye Tracking Research and Applications (ETRA), 2018
######################################################################################################################################
"""
import os
import cv2
import numpy as np
import csv
import argparse
# import dlib
import glob
def normalize_woimg(landmarks, focal_norm, distance_norm, roi_size, center, hr, ht, cam, gc=None):
center = center.reshape(3,1)
## universal function for data normalization
hR = cv2.Rodrigues(hr)[0] # rotation matrix
## ---------- normalize image ----------
distance = np.linalg.norm(center) # actual distance between eye and original camera
z_scale = distance_norm/distance
cam_norm = np.array([
[focal_norm, 0, roi_size[0]/2],
[0, focal_norm, roi_size[1]/2],
[0, 0, 1.0],
])
S = np.array([ # scaling matrix
[1.0, 0.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 0.0, z_scale],
])
hRx = hR[:,0]
forward = (center/distance).reshape(3)
down = np.cross(forward, hRx)
down /= np.linalg.norm(down)
right = np.cross(down, forward)
right /= np.linalg.norm(right)
R = np.c_[right, down, forward].T # rotation matrix R
W = np.dot(np.dot(cam_norm, S), np.dot(R, np.linalg.inv(cam))) # transformation matrix
## ---------- normalize rotation ----------
hR_norm = np.dot(R, hR) # rotation matrix in normalized space
# hr_norm = cv2.Rodrigues(hR_norm)[0] # convert rotation matrix to rotation vectors
## ---------- normalize gaze vector ----------
gc_normalized = None
num_point = landmarks.shape[0]
landmarks_warped = cv2.perspectiveTransform(landmarks.reshape(-1,1,2).astype('float32'), W)
landmarks_warped = landmarks_warped.reshape(num_point, 2)
if gc is not None:
gc_normalized = gc.reshape((3,1)) - center # gaze vector
# For modified data normalization, scaling is not applied to gaze direction (only R applied).
# For original data normalization, here should be:
# "M = np.dot(S,R)
# gc_normalized = np.dot(R, gc_normalized)"
gc_normalized = np.dot(R, gc_normalized)
gc_normalized = gc_normalized/np.linalg.norm(gc_normalized)
return [None, R, hR_norm, gc_normalized, landmarks_warped, W]
def normalize(img, landmarks, focal_norm, distance_norm, roi_size, center, hr, ht, cam, gc=None):
center = center.reshape(3,1)
## universal function for data normalization
hR = cv2.Rodrigues(hr)[0] # rotation matrix
## ---------- normalize image ----------
distance = np.linalg.norm(center) # actual distance between eye and original camera
z_scale = distance_norm/distance
cam_norm = np.array([
[focal_norm, 0, roi_size[0]/2],
[0, focal_norm, roi_size[1]/2],
[0, 0, 1.0],
])
S = np.array([ # scaling matrix
[1.0, 0.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 0.0, z_scale],
])
hRx = hR[:,0]
forward = (center/distance).reshape(3)
down = np.cross(forward, hRx)
down /= np.linalg.norm(down)
right = np.cross(down, forward)
right /= np.linalg.norm(right)
R = np.c_[right, down, forward].T # rotation matrix R
W = np.dot(np.dot(cam_norm, S), np.dot(R, np.linalg.inv(cam))) # transformation matrix
# if img is not None:
# img_warped = cv2.warpPerspective(img, W, roi_size) # image normalization
# else:
# img_warped = None
img_warped = cv2.warpPerspective(img, W, roi_size) # image normalization
## ---------- normalize rotation ----------
hR_norm = np.dot(R, hR) # rotation matrix in normalized space
# hr_norm = cv2.Rodrigues(hR_norm)[0] # convert rotation matrix to rotation vectors
## ---------- normalize gaze vector ----------
gc_normalized = None
num_point = landmarks.shape[0]
landmarks_warped = cv2.perspectiveTransform(landmarks.reshape(-1,1,2).astype('float32'), W)
landmarks_warped = landmarks_warped.reshape(num_point, 2)
if gc is not None:
gc_normalized = gc.reshape((3,1)) - center # gaze vector
# For modified data normalization, scaling is not applied to gaze direction (only R applied).
# For original data normalization, here should be:
# "M = np.dot(S,R)
# gc_normalized = np.dot(R, gc_normalized)"
gc_normalized = np.dot(R, gc_normalized)
gc_normalized = gc_normalized/np.linalg.norm(gc_normalized)
return [img_warped, R, hR_norm, gc_normalized, landmarks_warped, W]
def normalize_face(img, face, hr, ht, cam, gc=None):
## normalized camera parameters
focal_norm = 960 # focal length of normalized camera
distance_norm = 600 # normalized distance between eye and camera
roi_size = (224, 224) # size of cropped eye image
## compute estimated 3D positions of the landmarks
ht = ht.reshape((3,1))
hR = cv2.Rodrigues(hr)[0] # rotation matrix
Fc = np.dot(hR, face) + ht # 3D positions of facial landmarks
# fm = np.mean(Fc, axis=1).reshape((3,1)) # center of facial landmarks
two_eye_center = np.mean(Fc[:, 0:4], axis=1).reshape((3, 1))
nose_center = np.mean(Fc[:, 4:6], axis=1).reshape((3, 1))
# get the face center
face_center = np.mean(np.concatenate((two_eye_center, nose_center), axis=1), axis=1).reshape((3, 1))
# face_center = np.mean(Fc, axis=1).reshape((3,1))
return normalize(img, focal_norm, distance_norm, roi_size, face_center, hr, ht, cam, gc)
def normalize_eye(img, face, hr, ht, cam, gc=None):
## normalized camera parameters
focal_norm = 960 # focal length of normalized camera
distance_norm = 600 # normalized distance between eye and camera
roi_size = (60, 36) # size of cropped eye image
## compute estimated 3D positions of the landmarks
ht = ht.reshape((3,1))
hR = cv2.Rodrigues(hr)[0] # rotation matrix
Fc = np.dot(hR, face) + ht # 3D positions of facial landmarks
re = 0.5*(Fc[:,0] + Fc[:,1]).reshape((3,1)) # center of left eye
le = 0.5*(Fc[:,2] + Fc[:,3]).reshape((3,1)) # center of right eye
## normalize each eye
data = [
normalize(img, focal_norm, distance_norm, roi_size, re, hr, ht, cam, gc),
normalize(img, focal_norm, distance_norm, roi_size, le, hr, ht, cam, gc)
]
return data
def load_calibration(calib_path):
## load calibration data, these paramters are expected to be obtained by camera calibration functions in OpenCV
fs = cv2.FileStorage(calib_path, cv2.FILE_STORAGE_READ)
camera_matrix = fs.getNode('camera_matrix').mat()
camera_distortion = fs.getNode('dist_coeffs').mat()
return camera_matrix, camera_distortion
def load_facemodel(model_path):
# load the generic face model, which includes 6 facial landmarks: four eye corners and two mouth corners
fs = cv2.FileStorage(model_path, cv2.FILE_STORAGE_READ)
face_model = fs.getNode('face_model').mat()
return face_model
def read_image(img_path, camera_matrix, camera_distortion):
# load input image and undistort
img_original = cv2.imread(img_path)
img = cv2.undistort(img_original, camera_matrix, camera_distortion)
return img
def estimateHeadPose(landmarks, face_model, camera, distortion, iterate=True):
ret, rvec, tvec = cv2.solvePnP(face_model, landmarks, camera, distortion, flags=cv2.SOLVEPNP_EPNP)
## further optimize
if iterate:
ret, rvec, tvec = cv2.solvePnP(face_model, landmarks, camera, distortion, rvec, tvec, True)
return rvec, tvec
def detect_landmark(img, detector_path, predictor_path):
## obtain facial landmarks using dlib
detector = dlib.cnn_face_detection_model_v1(detector_path)
dets = detector(img, 0)
if len(dets) == 0:
return None
predictor = dlib.shape_predictor(predictor_path)
shape = predictor(img, dets[0].rect)
## extract required keypoints
landmarks = np.array([
[shape.part(36).x, shape.part(36).y],
[shape.part(39).x, shape.part(39).y],
[shape.part(42).x, shape.part(42).y],
[shape.part(45).x, shape.part(45).y],
[shape.part(48).x, shape.part(48).y],
[shape.part(54).x, shape.part(54).y]
])
return landmarks
def read_landmark(img_path):
img_file = img_path.split(os.path.sep)[-1]
day = img_path.split(os.path.sep)[-2]
person = img_path.split(os.path.sep)[-3]
person_path = os.path.split(os.path.split(img_path)[0])[0]
person_txt = os.path.join(person_path, person+'.txt')
index = os.path.join(day,img_file)
print(person_txt)
print(index)
with open(person_txt) as f:
data = f.readlines()
reader = csv.reader(data)
p = {}
for row in reader:
words = row[0].split()
p[words[0]] = words[1:]
landmarks = np.array([int(i) for i in p[index][2:14]]).reshape((6,2))
return landmarks
# def process_image(img_path, detector_path, predictor_path, camera_matrix, camera_distortion, face_model, gc=None):
# # read input image
# img = read_image(img_path, camera_matrix, camera_distortion)
# # detect facial landmarks
# landmarks = detect_landmark(img, detector_path, predictor_path)
# if landmarks is not None:
# # estimate head pose
# hr, ht = estimateHeadPose(face_model, landmarks, camera_matrix, camera_distortion)
# # data normalization for left and right eye image
# normalized_eyes = normalize_eye(img, face_model, hr, ht, camera_matrix, gc)
# # data normalization for full face
# normalized_face = normalize_face(img, face_model, hr, ht, camera_matrix, gc)
# # return a list of [reye, leye, face]
# return normalized_eyes + [normalized_face]