File size: 9,867 Bytes
78dd6e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
# -*- coding: utf-8 -*-
"""
######################################################################################################################################
This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. To view a copy of this license,
visit http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.

Any publications arising from the use of this software, including but
not limited to academic journal and conference publications, technical
reports and manuals, must cite at least one of the following works:

Revisiting Data Normalization for Appearance-Based Gaze Estimation
Xucong Zhang, Yusuke Sugano, Andreas Bulling
in Proc. International Symposium on Eye Tracking Research and Applications (ETRA), 2018
######################################################################################################################################
"""

import os
import cv2
import numpy as np
import csv
import argparse
# import dlib
import glob


	

	
def normalize_woimg(landmarks, focal_norm, distance_norm, roi_size, center, hr, ht, cam, gc=None):
	center = center.reshape(3,1)
	## universal function for data normalization
	hR = cv2.Rodrigues(hr)[0] # rotation matrix

	## ---------- normalize image ----------
	distance = np.linalg.norm(center) # actual distance between eye and original camera

	z_scale = distance_norm/distance
	cam_norm = np.array([
		[focal_norm, 0, roi_size[0]/2],
		[0, focal_norm, roi_size[1]/2],
		[0, 0, 1.0],
	])
	S = np.array([ # scaling matrix
		[1.0, 0.0, 0.0],
		[0.0, 1.0, 0.0],
		[0.0, 0.0, z_scale],
	])

	hRx = hR[:,0]
	forward = (center/distance).reshape(3)
	down = np.cross(forward, hRx)
	down /= np.linalg.norm(down)
	right = np.cross(down, forward)
	right /= np.linalg.norm(right)
	R = np.c_[right, down, forward].T # rotation matrix R

	W = np.dot(np.dot(cam_norm, S), np.dot(R, np.linalg.inv(cam))) # transformation matrix

	## ---------- normalize rotation ----------
	hR_norm = np.dot(R, hR) # rotation matrix in normalized space
	# hr_norm = cv2.Rodrigues(hR_norm)[0] # convert rotation matrix to rotation vectors

	## ---------- normalize gaze vector ----------
	gc_normalized = None

	num_point = landmarks.shape[0]
	landmarks_warped = cv2.perspectiveTransform(landmarks.reshape(-1,1,2).astype('float32'), W)
	landmarks_warped = landmarks_warped.reshape(num_point, 2)
	if gc is not None:
		gc_normalized = gc.reshape((3,1)) - center # gaze vector
		# For modified data normalization, scaling is not applied to gaze direction (only R applied).
		# For original data normalization, here should be:
		# "M = np.dot(S,R)
		# gc_normalized = np.dot(R, gc_normalized)"
		gc_normalized = np.dot(R, gc_normalized)
		gc_normalized = gc_normalized/np.linalg.norm(gc_normalized)

	return [None, R, hR_norm, gc_normalized, landmarks_warped, W]

	
def normalize(img, landmarks, focal_norm, distance_norm, roi_size, center, hr, ht, cam, gc=None):
	center = center.reshape(3,1)
	## universal function for data normalization
	hR = cv2.Rodrigues(hr)[0] # rotation matrix

	## ---------- normalize image ----------
	distance = np.linalg.norm(center) # actual distance between eye and original camera

	z_scale = distance_norm/distance
	cam_norm = np.array([
		[focal_norm, 0, roi_size[0]/2],
		[0, focal_norm, roi_size[1]/2],
		[0, 0, 1.0],
	])
	S = np.array([ # scaling matrix
		[1.0, 0.0, 0.0],
		[0.0, 1.0, 0.0],
		[0.0, 0.0, z_scale],
	])

	hRx = hR[:,0]
	forward = (center/distance).reshape(3)
	down = np.cross(forward, hRx)
	down /= np.linalg.norm(down)
	right = np.cross(down, forward)
	right /= np.linalg.norm(right)
	R = np.c_[right, down, forward].T # rotation matrix R
	W = np.dot(np.dot(cam_norm, S), np.dot(R, np.linalg.inv(cam))) # transformation matrix

	# if img is not None:
	# 	img_warped = cv2.warpPerspective(img, W, roi_size) # image normalization
	# else:
	# 	img_warped = None
	
	img_warped = cv2.warpPerspective(img, W, roi_size) # image normalization
	## ---------- normalize rotation ----------
	hR_norm = np.dot(R, hR) # rotation matrix in normalized space
	# hr_norm = cv2.Rodrigues(hR_norm)[0] # convert rotation matrix to rotation vectors

	## ---------- normalize gaze vector ----------
	gc_normalized = None
	num_point = landmarks.shape[0]
	landmarks_warped = cv2.perspectiveTransform(landmarks.reshape(-1,1,2).astype('float32'), W)
	landmarks_warped = landmarks_warped.reshape(num_point, 2)
	if gc is not None:
		gc_normalized = gc.reshape((3,1)) - center # gaze vector
		# For modified data normalization, scaling is not applied to gaze direction (only R applied).
		# For original data normalization, here should be:
		# "M = np.dot(S,R)
		# gc_normalized = np.dot(R, gc_normalized)"
		gc_normalized = np.dot(R, gc_normalized)
		gc_normalized = gc_normalized/np.linalg.norm(gc_normalized)

	return [img_warped, R, hR_norm, gc_normalized, landmarks_warped, W]

def normalize_face(img, face, hr, ht, cam, gc=None):
	## normalized camera parameters
	focal_norm = 960 # focal length of normalized camera
	distance_norm = 600 # normalized distance between eye and camera
	roi_size = (224, 224) # size of cropped eye image

	## compute estimated 3D positions of the landmarks
	ht = ht.reshape((3,1))
	hR = cv2.Rodrigues(hr)[0] # rotation matrix
	Fc = np.dot(hR, face) + ht # 3D positions of facial landmarks
	# fm = np.mean(Fc, axis=1).reshape((3,1)) # center of facial landmarks
	two_eye_center = np.mean(Fc[:, 0:4], axis=1).reshape((3, 1))
	nose_center = np.mean(Fc[:, 4:6], axis=1).reshape((3, 1))
	# get the face center
	face_center = np.mean(np.concatenate((two_eye_center, nose_center), axis=1), axis=1).reshape((3, 1))
	# face_center = np.mean(Fc, axis=1).reshape((3,1)) 
	return normalize(img, focal_norm, distance_norm, roi_size, face_center, hr, ht, cam, gc)

def normalize_eye(img, face, hr, ht, cam, gc=None):
	## normalized camera parameters
	focal_norm = 960 # focal length of normalized camera
	distance_norm = 600 # normalized distance between eye and camera
	roi_size = (60, 36) # size of cropped eye image

	## compute estimated 3D positions of the landmarks
	ht = ht.reshape((3,1))
	hR = cv2.Rodrigues(hr)[0] # rotation matrix
	Fc = np.dot(hR, face) + ht # 3D positions of facial landmarks
	re = 0.5*(Fc[:,0] + Fc[:,1]).reshape((3,1)) # center of left eye
	le = 0.5*(Fc[:,2] + Fc[:,3]).reshape((3,1)) # center of right eye

	## normalize each eye
	data = [
		normalize(img, focal_norm, distance_norm, roi_size, re, hr, ht, cam, gc),
		normalize(img, focal_norm, distance_norm, roi_size, le, hr, ht, cam, gc)
	]
	return data

def load_calibration(calib_path):
	## load calibration data, these paramters are expected to be obtained by camera calibration functions in OpenCV
	fs = cv2.FileStorage(calib_path, cv2.FILE_STORAGE_READ)
	camera_matrix = fs.getNode('camera_matrix').mat()
	camera_distortion = fs.getNode('dist_coeffs').mat()
	return camera_matrix, camera_distortion

def load_facemodel(model_path):
	# load the generic face model, which includes 6 facial landmarks: four eye corners and two mouth corners
	fs = cv2.FileStorage(model_path, cv2.FILE_STORAGE_READ)
	face_model = fs.getNode('face_model').mat()
	return face_model

def read_image(img_path, camera_matrix, camera_distortion):
	# load input image and undistort
	img_original = cv2.imread(img_path)
	img = cv2.undistort(img_original, camera_matrix, camera_distortion)

	return img

def estimateHeadPose(landmarks, face_model, camera, distortion, iterate=True):
	ret, rvec, tvec = cv2.solvePnP(face_model, landmarks, camera, distortion, flags=cv2.SOLVEPNP_EPNP)

	## further optimize
	if iterate:
		ret, rvec, tvec = cv2.solvePnP(face_model, landmarks, camera, distortion, rvec, tvec, True)

	return rvec, tvec

def detect_landmark(img, detector_path, predictor_path):
	## obtain facial landmarks using dlib
	detector = dlib.cnn_face_detection_model_v1(detector_path)
	dets = detector(img, 0)

	if len(dets) == 0:
		return None

	predictor = dlib.shape_predictor(predictor_path)
	shape = predictor(img, dets[0].rect)

	## extract required keypoints
	landmarks = np.array([
		[shape.part(36).x, shape.part(36).y],
		[shape.part(39).x, shape.part(39).y],
		[shape.part(42).x, shape.part(42).y],
		[shape.part(45).x, shape.part(45).y],
		[shape.part(48).x, shape.part(48).y],
		[shape.part(54).x, shape.part(54).y]
	])

	return landmarks


def read_landmark(img_path):
	img_file = img_path.split(os.path.sep)[-1]
	day = img_path.split(os.path.sep)[-2]
	person = img_path.split(os.path.sep)[-3]
	person_path = os.path.split(os.path.split(img_path)[0])[0]

	person_txt = os.path.join(person_path, person+'.txt')
	index = os.path.join(day,img_file)
	print(person_txt)
	print(index)

	with open(person_txt) as f:
		data = f.readlines()
	reader = csv.reader(data)
	p = {}
	for row in reader:
		words = row[0].split()
		p[words[0]] = words[1:]
	landmarks = np.array([int(i) for i in p[index][2:14]]).reshape((6,2))
	return landmarks

# def process_image(img_path, detector_path, predictor_path, camera_matrix, camera_distortion, face_model, gc=None):
#     # read input image
#     img = read_image(img_path, camera_matrix, camera_distortion)

#     # detect facial landmarks
#     landmarks = detect_landmark(img, detector_path, predictor_path)

#     if landmarks is not None:
#         # estimate head pose
#         hr, ht = estimateHeadPose(face_model, landmarks, camera_matrix, camera_distortion)

#         # data normalization for left and right eye image
#         normalized_eyes = normalize_eye(img, face_model, hr, ht, camera_matrix, gc)

#         # data normalization for full face
#         normalized_face = normalize_face(img, face_model, hr, ht, camera_matrix, gc)

#         # return a list of [reye, leye, face]
#         return normalized_eyes + [normalized_face]