saptak21 commited on Apr 22

Commit

bcebb15

verified ·

1 Parent(s): d595be7

Upload 73 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
data/face_model.txt +50 -0
datasets/__init__.py +0 -0
datasets/eyediap.py +103 -0
datasets/gaze360.py +106 -0
datasets/gazecapture.py +132 -0
datasets/helper/image_transform.py +81 -0
datasets/mpiigaze.py +109 -0
datasets/xgaze.py +137 -0
examples/De_Nachtwacht.png +3 -0
examples/The_Night_Watch_Frans_Banninck_Cocq.png +3 -0
gazelib/__init__.py +1 -0
gazelib/draw/__init__.py +0 -0
gazelib/draw/draw_image.py +69 -0
gazelib/gaze/__init__.py +1 -0
gazelib/gaze/gaze_utils.py +166 -0
gazelib/gaze/normalize.py +266 -0
gazelib/label_transform.py +195 -0
gazelib/utils/__init__.py +4 -0
gazelib/utils/color_text.py +85 -0
gazelib/utils/h5_utils.py +53 -0
models/hybrid_tr.py +570 -0
models/resnet.py +366 -0
models/vit/mae.py +429 -0
models/vit/mae_gaze.py +69 -0
models/vit/vit_gaze.py +103 -0
unigaze/__init__.py +0 -0
unigaze/configs/config.yaml +38 -0
unigaze/configs/data/eyediap_cs.yaml +22 -0
unigaze/configs/data/eyediap_cs_test.yaml +22 -0
unigaze/configs/data/eyediap_cs_train.yaml +22 -0
unigaze/configs/data/eyediap_ft.yaml +24 -0
unigaze/configs/data/eyediap_ft_test.yaml +24 -0
unigaze/configs/data/eyediap_ft_train.yaml +24 -0
unigaze/configs/data/gaze360_test.yaml +74 -0
unigaze/configs/data/gaze360_train.yaml +252 -0
unigaze/configs/data/gazecapture_test.yaml +15 -0
unigaze/configs/data/gazecapture_test_ds15.yaml +16 -0
unigaze/configs/data/gazecapture_train.yaml +1189 -0
unigaze/configs/data/gazecapture_train_ds15.yaml +1189 -0
unigaze/configs/data/mpiigaze.yaml +24 -0
unigaze/configs/data/mpiigaze_test.yaml +24 -0
unigaze/configs/data/mpiigaze_train.yaml +24 -0
unigaze/configs/data/xgaze_0_60sub.yaml +76 -0
unigaze/configs/data/xgaze_0_60sub_d3.yaml +80 -0
unigaze/configs/data/xgaze_0_80sub.yaml +97 -0
unigaze/configs/data/xgaze_0_80sub_d3.yaml +97 -0
unigaze/configs/data/xgaze_60_80sub.yaml +31 -0
unigaze/configs/exp/blank.yaml +22 -0
unigaze/configs/exp/cross/train_ED.yaml +27 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+examples/De_Nachtwacht.png filter=lfs diff=lfs merge=lfs -text
+examples/The_Night_Watch_Frans_Banninck_Cocq.png filter=lfs diff=lfs merge=lfs -text

data/face_model.txt ADDED Viewed

	@@ -0,0 +1,50 @@

+5.862468481063842773e-01 7.872964477539062500e+01 2.317634201049804688e+01
+-5.711375045776367188e+01 -5.130039978027343750e+01 4.678271484375000000e+01
+-5.021675109863281250e+01 -5.602691268920898438e+01 3.416214370727539062e+01
+-3.879566955566406250e+01 -5.690497207641601562e+01 2.192905616760253906e+01
+-2.962696456909179688e+01 -5.768646621704101562e+01 1.585745716094970703e+01
+-1.556392288208007812e+01 -5.381772232055664062e+01 1.200321197509765625e+01
+1.493891811370849609e+01 -5.252636718750000000e+01 1.241601753234863281e+01
+2.762125968933105469e+01 -5.633798599243164062e+01 1.620070838928222656e+01
+3.687218856811523438e+01 -5.588240051269531250e+01 2.234012985229492188e+01
+4.801872634887695312e+01 -5.413969039916992188e+01 3.287670516967773438e+01
+5.493420410156250000e+01 -4.876091766357421875e+01 4.391139984130859375e+01
+9.755885004997253418e-01 -3.599571609497070312e+01 1.533371734619140625e+01
+1.295488834381103516e+00 -1.837105178833007812e+01 1.295253086090087891e+01
+1.169039964675903320e+00 -5.502729415893554688e+00 6.933759689331054688e+00
+1.324353933334350586e+00 5.223155975341796875e+00 3.281763553619384766e+00
+-1.061166477203369141e+01 1.295834922790527344e+01 2.162276458740234375e+01
+-5.147602558135986328e+00 1.608338356018066406e+01 1.863278388977050781e+01
+7.948544025421142578e-01 1.780137062072753906e+01 1.740065383911132812e+01
+6.404633045196533203e+00 1.649684906005859375e+01 1.887524223327636719e+01
+1.128962993621826172e+01 1.386424446105957031e+01 2.183790016174316406e+01
+-4.650949859619140625e+01 -3.832709503173828125e+01 3.641600418090820312e+01
+-3.662562179565429688e+01 -4.003409194946289062e+01 2.697853851318359375e+01
+-2.613725852966308594e+01 -4.035707473754882812e+01 2.568147850036621094e+01
+-1.776072120666503906e+01 -3.258519744873046875e+01 2.907615661621093750e+01
+-2.857307624816894531e+01 -3.133931159973144531e+01 2.851314163208007812e+01
+-3.531597518920898438e+01 -3.336409759521484375e+01 2.953546142578125000e+01
+1.804391098022460938e+01 -3.095682334899902344e+01 2.906296730041503906e+01
+2.545973777770996094e+01 -3.785017395019531250e+01 2.660374259948730469e+01
+3.494161224365234375e+01 -3.641166687011718750e+01 2.815935897827148438e+01
+4.473758697509765625e+01 -3.410787200927734375e+01 3.673243713378906250e+01
+3.460580825805664062e+01 -2.936051368713378906e+01 3.002419853210449219e+01
+2.828340530395507812e+01 -2.810362434387207031e+01 2.857681274414062500e+01
+-2.000109672546386719e+01 3.587311935424804688e+01 2.467940139770507812e+01
+-1.517112541198730469e+01 3.055978584289550781e+01 2.077887535095214844e+01
+-4.272953987121582031e+00 2.849174499511718750e+01 1.563890647888183594e+01
+9.129478931427001953e-01 2.940682983398437500e+01 1.530903434753417969e+01
+5.915512084960937500e+00 2.886590385437011719e+01 1.577433967590332031e+01
+1.609077262878417969e+01 3.112099075317382812e+01 2.045352745056152344e+01
+2.146691894531250000e+01 3.712250137329101562e+01 2.439267730712890625e+01
+1.636684226989746094e+01 4.110508346557617188e+01 2.019831085205078125e+01
+8.093836784362792969e+00 4.461882400512695312e+01 1.674007606506347656e+01
+6.376140713691711426e-01 4.504141998291015625e+01 1.691001510620117188e+01
+-6.237400531768798828e+00 4.375403594970703125e+01 1.704776954650878906e+01
+-1.439151859283447266e+01 4.025728225708007812e+01 2.136468315124511719e+01
+-9.422926902770996094e+00 3.452179336547851562e+01 2.028252601623535156e+01
+1.115690827369689941e+00 3.555863952636718750e+01 1.827753639221191406e+01
+1.108111095428466797e+01 3.538360214233398438e+01 2.027869033813476562e+01
+1.114828586578369141e+01 3.651076889038085938e+01 2.039755630493164062e+01
+9.804738759994506836e-01 3.681156921386718750e+01 1.785094261169433594e+01
+-9.598259925842285156e+00 3.567073822021484375e+01 2.036244964599609375e+01

datasets/__init__.py ADDED Viewed

File without changes

datasets/eyediap.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import os
+import numpy as np
+import h5py
+import cv2
+from torch.utils.data import Dataset
+from typing import List
+from omegaconf import OmegaConf, listconfig
+from .helper.image_transform import wrap_transforms
+class EYEDIAPDataset(Dataset):
+	def __init__(self,
+				dataset_path: str,
+				color_type,
+				keys_to_use: List[str] = None,
+				data_name=None,
+				image_size:int=224,  ## <---
+				transform_type='basic_imagenet', ## <--- modified
+				image_key='face_patch',
+				gaze_key='face_gaze',
+				):
+		self.path = dataset_path
+		self.hdfs = {}
+		self.data_name = data_name
+		self.image_key = image_key
+		self.gaze_key = gaze_key
+		self.image_size = (image_size, image_size)
+		assert color_type in ['rgb', 'bgr']
+		self.color_type = color_type
+		self.selected_keys = [k for k in keys_to_use]
+		assert len(self.selected_keys) > 0
+		self.file_paths = [os.path.join(self.path, k) for k in self.selected_keys]
+		for num_i in range(0, len(self.selected_keys)):
+			file_path = os.path.join(self.path, self.selected_keys[num_i]) # the subdirectories: train, test are not used in MPIIFaceGaze and MPII_Rotate
+			self.hdfs[num_i] = h5py.File(file_path, 'r', swmr=True)
+			print('read file: ', os.path.join(self.path, self.selected_keys[num_i]))
+			assert self.hdfs[num_i].swmr_mode
+		self.build_idx_to_kv()
+		for num_i in range(0, len(self.hdfs)):
+			if self.hdfs[num_i]:
+				self.hdfs[num_i].close()
+				self.hdfs[num_i] = None
+		self.transform = wrap_transforms(transform_type, image_size=image_size)
+		self.__hdfs = None
+		self.hdf = None
+	def __len__(self):
+		return len(self.idx_to_kv)
+	def __del__(self):
+		for num_i in range(0, len(self.hdfs)):
+			if self.hdfs[num_i]:
+				self.hdfs[num_i].close()
+				self.hdfs[num_i] = None
+	def build_idx_to_kv(self):
+		self.idx_to_kv = []
+		self.key_idx_dict = {}
+		for num_i in range(0, len(self.selected_keys)):
+			this_sub = self.selected_keys[num_i].split('.')[0]
+			n = self.hdfs[num_i][self.image_key].shape[0]
+			self.idx_to_kv += [(num_i, i) for i in range(n)]
+			self.key_idx_dict[this_sub] = [ i for i in range(n)]
+	@property
+	def archives(self):
+		if self.__hdfs is None: # lazy loading here!
+			self.__hdfs = [h5py.File(h5_path, "r", swmr=True) for h5_path in self.file_paths]
+		return self.__hdfs
+	def preprocess_image(self, image):
+		image = image.astype(np.float32)
+		if self.color_type == 'bgr':
+			image = image[..., ::-1]
+		image = cv2.resize(image, self.image_size, interpolation=cv2.INTER_AREA)
+		image = self.transform(image.astype(np.uint8)		)
+		return image
+	def __getitem__(self, index):
+		key, idx = self.idx_to_kv[index]
+		self.hdf = self.archives[key]
+		assert self.hdf.swmr_mode
+		image = self.hdf[self.image_key][idx, :]
+		gaze_label = self.hdf[self.gaze_key][idx].astype('float') if self.gaze_key in self.hdf else np.array([0,0]).astype('float')
+		head_label = self.hdf['face_head_pose'][idx].astype('float') if 'face_head_pose' in self.hdf else np.array([0,0]).astype('float')
+		entry = {
+			'image': self.preprocess_image(image),
+			'gaze': gaze_label,
+			'head': head_label,
+			'key': key,
+			'index':index
+		}
+		return entry

datasets/gaze360.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import os
+import numpy as np
+import h5py, cv2
+from torch.utils.data import Dataset
+from typing import List
+from .helper.image_transform import wrap_transforms
+class Gaze360Dataset(Dataset):
+	def __init__(self,
+				dataset_path: str,
+				color_type,
+				keys_to_use: List[str] = None,
+				data_name=None,
+				image_size:int=224,
+				transform_type='basic_imagenet',
+				image_key='face_patch',
+				gaze_key='face_gaze',
+				sample_rate_use=1,
+				):
+		super().__init__()
+		self.dataset_path = dataset_path
+		self.hdfs = {}
+		self.data_name = data_name
+		self.image_key = image_key
+		self.gaze_key = gaze_key
+		self.image_size = (image_size, image_size)
+		assert color_type in ['rgb', 'bgr']
+		self.color_type = color_type
+		self.transform = wrap_transforms(transform_type, image_size=image_size)
+		self.sample_rate_use = sample_rate_use
+		#### -------------------------------------------------------- read the h5 files -------------------------------------------------------
+		self.selected_keys = [k for k in keys_to_use]
+		assert len(self.selected_keys) > 0
+		self.file_paths = [os.path.join(self.dataset_path, k) for k in self.selected_keys]
+		for num_i in range(0, len(self.selected_keys)):
+			file_path = os.path.join(self.dataset_path, self.selected_keys[num_i]) # the subdirectories: train, test are not used in MPIIFaceGaze and MPII_Rotate
+			self.hdfs[num_i] = h5py.File(file_path, 'r', swmr=True)
+			print('read file: ', os.path.join(self.dataset_path, self.selected_keys[num_i]))
+			assert self.hdfs[num_i].swmr_mode
+		####-----------------------------------------------------------------------------------------------------------------------------------
+		self.build_idx_to_kv()
+		for num_i in range(0, len(self.hdfs)):
+			if self.hdfs[num_i]:
+				self.hdfs[num_i].close()
+				self.hdfs[num_i] = None
+		self.__hdfs = None
+		self.hdf = None
+	def build_idx_to_kv(self):
+		self.idx_to_kv = []
+		self.key_idx_dict = {}
+		for num_i in range(0, len(self.selected_keys)):
+			p_key = self.selected_keys[num_i].split('.')[0]  ##p00
+			n = self.hdfs[num_i][self.image_key].shape[0]
+			if self.sample_rate_use > 1:
+				indices = np.arange(0, n, self.sample_rate_use)
+			else:
+				indices = np.arange(0, n)
+			self.idx_to_kv += [(num_i, i) for i in indices]
+			self.key_idx_dict[p_key] = [i for i in indices]
+	def __len__(self):
+		return len(self.idx_to_kv)
+	def __del__(self):
+		for num_i in range(0, len(self.hdfs)):
+			if self.hdfs[num_i]:
+				self.hdfs[num_i].close()
+				self.hdfs[num_i] = None
+	@property
+	def archives(self):
+		if self.__hdfs is None: # lazy loading here!
+			self.__hdfs = [h5py.File(h5_path, "r", swmr=True) for h5_path in self.file_paths]
+		return self.__hdfs
+	def preprocess_image(self, image):
+		image = image.astype(np.float32)
+		if self.color_type == 'bgr':
+			image = image[..., ::-1]
+		if image.shape[0] != self.image_size[0] or image.shape[1] != self.image_size[1]:
+			image = cv2.resize(image, self.image_size, interpolation=cv2.INTER_AREA)
+		image = self.transform(image.astype(np.uint8)		)
+		return image
+	def __getitem__(self, index):
+		key, idx = self.idx_to_kv[index]
+		self.hdf = self.archives[key]
+		image = self.hdf[self.image_key][idx]
+		gaze_label = self.hdf[self.gaze_key][idx].astype('float') if self.gaze_key in self.hdf else np.array([0,0]).astype('float')
+		head_label = self.hdf['face_head_pose'][idx].astype('float') if 'face_head_pose' in self.hdf else np.array([0,0]).astype('float')
+		entry = {
+			'image': self.preprocess_image(image),
+			'gaze': gaze_label,
+			'head': head_label,
+			'key': idx,
+			'index':index
+		}
+		return entry

datasets/gazecapture.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import os
+import numpy as np
+import h5py
+import cv2
+from torch.utils.data import Dataset
+from typing import List
+from omegaconf import OmegaConf, listconfig
+from .helper.image_transform import wrap_transforms
+class GazeCaptureDataset(Dataset):
+	def __init__(self,
+				dataset_path: str,
+				color_type,
+				keys_to_use: List[str] = None,
+				data_name=None,
+				image_size:int=224,  ## <---
+				transform_type='basic_imagenet', ## <--- modified
+				image_key='face_patch',
+				gaze_key='face_gaze',
+				sample_rate_use=1,
+				):
+		self.transform = wrap_transforms(transform_type, image_size=image_size)
+		self.path = dataset_path
+		self.hdfs = {}
+		self.data_name = data_name
+		self.image_key = image_key
+		self.gaze_key = gaze_key
+		self.image_size = (image_size, image_size)
+		self.sample_rate_use = sample_rate_use
+		assert color_type in ['rgb', 'bgr']
+		self.color_type = color_type
+		self.selected_keys = [ k for k in keys_to_use]
+		assert len(self.selected_keys) > 0
+		self.file_paths = [os.path.join(self.path, k) for k in self.selected_keys]
+		for num_i in range(0, len(self.selected_keys)):
+			file_path = os.path.join(self.path, self.selected_keys[num_i]) # the subdirectories: train, test are not used in MPIIFaceGaze and MPII_Rotate
+			self.hdfs[num_i] = h5py.File(file_path, 'r', swmr=True)
+			print('read file: ', os.path.join(self.path, self.selected_keys[num_i]))
+			assert self.hdfs[num_i].swmr_mode
+		self.build_idx_to_kv()
+		for num_i in range(0, len(self.hdfs)):
+			if self.hdfs[num_i]:
+				self.hdfs[num_i].close()
+				self.hdfs[num_i] = None
+		self.__hdfs = None
+		self.hdf = None
+	def __len__(self):
+		return len(self.idx_to_kv)
+	def __del__(self):
+		for num_i in range(0, len(self.hdfs)):
+			if self.hdfs[num_i]:
+				self.hdfs[num_i].close()
+				self.hdfs[num_i] = None
+	def build_idx_to_kv(self):
+		self.idx_to_kv = []
+		self.key_idx_dict = {}
+		for num_i in range(0, len(self.selected_keys)):
+			this_sub = self.selected_keys[num_i].split('.')[0]
+			n = self.hdfs[num_i][self.image_key].shape[0]
+			if self.sample_rate_use > 1:
+				indices = np.arange(0, n, self.sample_rate_use)
+			else:
+				indices = np.arange(0, n)
+			self.idx_to_kv += [(num_i, i) for i in indices ]
+			self.key_idx_dict[this_sub] = [ i for i in indices ]
+	@property
+	def archives(self):
+		if self.__hdfs is None: # lazy loading here!
+			self.__hdfs = [h5py.File(h5_path, "r", swmr=True) for h5_path in self.file_paths]
+		return self.__hdfs
+	def preprocess_image(self, image):
+		image = image.astype(np.float32)
+		if self.color_type == 'bgr':
+			image = image[..., ::-1]
+		image = cv2.resize(image, self.image_size, interpolation=cv2.INTER_AREA)
+		image = self.transform(image.astype(np.uint8)		)
+		return image
+	def __getitem__(self, index):
+		key, idx = self.idx_to_kv[index]
+		self.hdf = self.archives[key]
+		# self.hdf = h5py.File(os.path.join(self.path, self.selected_keys[key]), 'r', swmr=True)
+		assert self.hdf.swmr_mode
+		image = self.hdf[self.image_key][idx, :]
+		gaze_label = self.hdf[self.gaze_key][idx].astype('float') if self.gaze_key in self.hdf else np.array([0,0]).astype('float')
+		head_label = self.hdf['face_head_pose'][idx].astype('float') if 'face_head_pose' in self.hdf else np.array([0,0]).astype('float')
+		entry = {
+			'image': self.preprocess_image(image),
+			'gaze': gaze_label,
+			'head': head_label,
+			'key': key,
+			'index':index
+		}
+		return entry
+# class GazeCaptureDatasetSubset(GazeCaptureDataset):
+# 	def __init__(self, images_per_person=None, **kwargs):
+# 		self.images_per_person = images_per_person
+# 		super().__init__(**kwargs)
+# 	def build_idx_to_kv(self):
+# 		self.idx_to_kv = []
+# 		self.key_idx_dict = {}
+# 		for num_i in range(0, len(self.selected_keys)):
+# 			this_sub = self.selected_keys[num_i].split('.')[0]
+# 			n = self.hdfs[num_i][self.image_key].shape[0]
+# 			if self.images_per_person is not None:
+# 				n = min(n, self.images_per_person)
+# 			self.idx_to_kv += [(num_i, i) for i in range(n)]
+# 			self.key_idx_dict[this_sub] = [ i for i in range(n)]

datasets/helper/image_transform.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import cv2
+from torchvision import transforms
+import numpy as np
+import torch
+def re_normalize(image_tensor, old='[-1,1]', new='imagenet'):
+	"""
+	Re-normalizes an image tensor from one normalization scheme to another.
+	Args:
+		image_tensor (torch.Tensor): Image tensor to be re-normalized.
+		old (str): Old normalization scheme. Options: '[-1,1]', 'imagenet'.
+		new (str): New normalization scheme. Options: '[-1,1]', 'imagenet'.
+	Returns:
+		torch.Tensor: Re-normalized image tensor.
+	"""
+	# Old normalization parameters
+	device = image_tensor.device
+	if old == '[-1,1]':
+		old_mean = torch.tensor([0.5, 0.5, 0.5]).view(1, 3, 1, 1).to(device)
+		old_std = torch.tensor([0.5, 0.5, 0.5]).view(1, 3, 1, 1).to(device)
+	elif old == 'imagenet':
+		old_mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(device)
+		old_std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(device)
+	elif old == '[0,1]':
+		old_mean = torch.tensor([0.0, 0.0, 0.0]).view(1, 3, 1, 1).to(device)
+		old_std = torch.tensor([1.0, 1.0, 1.0]).view(1, 3, 1, 1).to(device)
+	else:
+		print('old normalization not implemented')
+		raise NotImplementedError
+	# New normalization parameters
+	if new == '[-1,1]':
+		new_mean = torch.tensor([0.5, 0.5, 0.5]).view(1, 3, 1, 1).to(device)
+		new_std = torch.tensor([0.5, 0.5, 0.5]).view(1, 3, 1, 1).to(device)
+	elif new == 'imagenet':
+		new_mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(device)
+		new_std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(device)
+	elif new == '[0,1]':
+		new_mean = torch.tensor([0.0, 0.0, 0.0]).view(1, 3, 1, 1).to(device)
+		new_std = torch.tensor([1.0, 1.0, 1.0]).view(1, 3, 1, 1).to(device)
+	else:
+		print('new normalization not implemented')
+		raise NotImplementedError
+	# Step 1: Denormalize the image tensor using the old mean and std
+	denormalized_image = image_tensor * old_std + old_mean
+	# Step 2: Normalize the image tensor using the new mean and std
+	normalized_image = (denormalized_image - new_mean) / new_std
+	return normalized_image
+def wrap_transforms(image_transforms_type, image_size):
+	if image_transforms_type == 'basic_imagenet':
+		MEAN = [0.485, 0.456, 0.406]
+		STD = [0.229, 0.224, 0.225]
+		return transforms.Compose([
+				transforms.ToPILImage(),
+				transforms.ToTensor(),
+				transforms.Normalize(mean=MEAN, std=STD)
+			])
+	else:
+		raise NotImplementedError
+# def enhance_contrast_clahe(image):
+# 	clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+# 	lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
+# 	lab_planes = list( cv2.split(lab) )
+# 	lab_planes[0] = clahe.apply(lab_planes[0])
+# 	lab = cv2.merge(lab_planes)
+# 	image = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
+# 	return image

datasets/mpiigaze.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import os
+import numpy as np
+import h5py
+import cv2
+from torch.utils.data import Dataset
+from typing import List
+from omegaconf import OmegaConf, listconfig
+from .helper.image_transform import wrap_transforms
+class MPIIGazeDataset(Dataset):
+	def __init__(self,
+				dataset_path: str,
+				color_type,
+				keys_to_use: List[str] = None,
+				data_name=None,
+				image_size:int=224,  ## <---
+				transform_type='basic_imagenet', ## <--- modified
+				image_key='face_patch',
+				gaze_key='face_gaze',
+				):
+		self.dataset_path = dataset_path
+		self.hdfs = {}
+		self.data_name = data_name
+		self.image_key = image_key
+		self.gaze_key = gaze_key
+		self.image_size = (image_size, image_size)
+		assert color_type in ['rgb', 'bgr']
+		self.color_type = color_type
+		self.transform = wrap_transforms(transform_type, image_size=image_size)
+		self.selected_keys = [k for k in keys_to_use]
+		assert len(self.selected_keys) > 0
+		self.file_paths = [os.path.join(self.dataset_path, k) for k in self.selected_keys]
+		for num_i in range(0, len(self.selected_keys)):
+			file_path = os.path.join(self.dataset_path, self.selected_keys[num_i]) # the subdirectories: train, test are not used in MPIIFaceGaze and MPII_Rotate
+			self.hdfs[num_i] = h5py.File(file_path, 'r', swmr=True)
+			print('read file: ', os.path.join(self.dataset_path, self.selected_keys[num_i]))
+			assert self.hdfs[num_i].swmr_mode
+		self.build_idx_to_kv()
+		for num_i in range(0, len(self.hdfs)):
+			if self.hdfs[num_i]:
+				self.hdfs[num_i].close()
+				self.hdfs[num_i] = None
+		self.__hdfs = None
+		self.hdf = None
+	def __len__(self):
+		return len(self.idx_to_kv)
+	def __del__(self):
+		for num_i in range(0, len(self.hdfs)):
+			if self.hdfs[num_i]:
+				self.hdfs[num_i].close()
+				self.hdfs[num_i] = None
+	def build_idx_to_kv(self):
+		self.idx_to_kv = []
+		self.key_idx_dict = {}
+		for num_i in range(0, len(self.selected_keys)):
+			p_key = self.selected_keys[num_i].split('.')[0]  ##p00
+			n = self.hdfs[num_i][self.image_key].shape[0]
+			self.idx_to_kv += [(num_i, i) for i in range(n)]
+			self.key_idx_dict[p_key] = [i for i in range(n)]
+	@property
+	def archives(self):
+		if self.__hdfs is None: # lazy loading here!
+			self.__hdfs = [h5py.File(h5_path, "r", swmr=True) for h5_path in self.file_paths]
+		return self.__hdfs
+	def preprocess_image(self, image):
+		image = image.astype(np.float32)
+		if self.color_type == 'bgr':
+			image = image[..., ::-1]
+		if image.shape[0] != self.image_size[0] or image.shape[1] != self.image_size[1]:
+			image = cv2.resize(image, self.image_size, interpolation=cv2.INTER_AREA)
+		image = self.transform(image.astype(np.uint8)		)
+		return image
+	def __getitem__(self, index):
+		key, idx = self.idx_to_kv[index]
+		self.hdf = self.archives[key]
+		# self.hdf = h5py.File(os.path.join(self.dataset_path, self.selected_keys[key]), 'r', swmr=True)
+		assert self.hdf.swmr_mode
+		image = self.hdf[self.image_key][idx, :]
+		gaze_label = self.hdf[self.gaze_key][idx].astype('float') if self.gaze_key in self.hdf else np.array([0,0]).astype('float')
+		head_label = self.hdf['face_head_pose'][idx].astype('float') if 'face_head_pose' in self.hdf else np.array([0,0]).astype('float')
+		entry = {
+			'image': self.preprocess_image(image),
+			'gaze': gaze_label,
+			'head': head_label,
+			'key': key,
+			'index':index
+		}
+		return entry

datasets/xgaze.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import os,random
+import numpy as np
+import h5py
+import cv2
+from typing import List
+from torch.utils.data import Dataset
+from .helper.image_transform import wrap_transforms
+class XGazeDataset(Dataset):
+	def __init__(self,
+				dataset_path: str,
+				color_type,
+				images_per_frame,
+				keys_to_use: List[str] = None,
+				data_name=None,
+				image_size:int=224,
+				transform_type='basic_imagenet', ## <--- modified
+				image_key='face_patch',
+				gaze_key='face_gaze',
+				camera_random=None,
+				frame_tag=[0,1000],
+				seed=0,
+				):
+		self.path = dataset_path
+		self.hdfs = {}
+		self.data_name = data_name
+		self.images_per_frame = images_per_frame
+		print('images_per_frame: ', images_per_frame)
+		self.image_key = image_key
+		self.gaze_key = gaze_key
+		self.image_size = (image_size, image_size)
+		random.seed(seed)
+		assert color_type in ['rgb', 'bgr']
+		self.color_type = color_type
+		self.cameras_idx = list(range(self.images_per_frame))
+		self.camera_random = camera_random
+		#### -------------------------------------------------------- read the h5 files -------------------------------------------------------
+		self.selected_keys = [k for k in keys_to_use]
+		assert len(self.selected_keys) > 0
+		self.file_paths = [os.path.join(self.path, k) for k in self.selected_keys]
+		for num_i in range(0, len(self.selected_keys)):
+			file_path = os.path.join(self.path, self.selected_keys[num_i]) # the subdirectories: train, test are not used in MPIIFaceGaze and MPII_Rotate
+			self.hdfs[num_i] = h5py.File(file_path, 'r', swmr=True)
+			print('read file: ', os.path.join(self.path, self.selected_keys[num_i]))
+			assert self.hdfs[num_i].swmr_mode
+		####-----------------------------------------------------------------------------------------------------------------------------------
+		self.idx_to_kv = []
+		self.key_idx_dict = {} ## this is for reading the second sample from the same person
+		for num_i in range(0, len(self.selected_keys)):
+			this_sub = self.selected_keys[num_i].split('.')[0]
+			n = self.hdfs[num_i][image_key].shape[0]
+			if type(frame_tag) == list:
+				self.start_frame, self.end_frame = frame_tag
+			elif frame_tag == 'all':
+				self.start_frame, self.end_frame = 0, 10000
+			else:
+				raise ValueError("frame_tag should be either a list of integers or str 'all' ")
+			start_idx = min(n, self.start_frame * self.images_per_frame)
+			end_idx =  min(n, self.end_frame  * self.images_per_frame)
+			if self.camera_random is None:
+				self.idx_to_kv +=  [(num_i, i) for i in range(start_idx, end_idx) if (i % self.images_per_frame ) in self.cameras_idx ]
+				self.key_idx_dict[this_sub] = [ i for i in range(start_idx, end_idx) if (i % self.images_per_frame ) in self.cameras_idx ]
+			else:
+				for frame in range(start_idx // self.images_per_frame, end_idx // self.images_per_frame):
+					frame_start_idx = frame * self.images_per_frame
+					frame_end_idx = frame_start_idx + self.images_per_frame
+					# Randomly select self.images_per_frame camera indices for this frame
+					random_cameras_idx = random.sample(range(self.images_per_frame), self.camera_random)
+					self.idx_to_kv += [(num_i, i) for i in range(frame_start_idx, frame_end_idx) if (i % self.images_per_frame) in random_cameras_idx]
+					self.key_idx_dict.setdefault(this_sub, []).extend(
+						[i for i in range(frame_start_idx, frame_end_idx) if (i % self.images_per_frame) in random_cameras_idx]
+					)
+		for num_i in range(0, len(self.hdfs)):
+			if self.hdfs[num_i]:
+				self.hdfs[num_i].close()
+				self.hdfs[num_i] = None
+		self.transform = wrap_transforms(transform_type, image_size=image_size)
+		self.__hdfs = None
+		self.hdf = None
+	def __len__(self):
+		return len(self.idx_to_kv)
+	def __del__(self):
+		for num_i in range(0, len(self.hdfs)):
+			if self.hdfs[num_i]:
+				self.hdfs[num_i].close()
+				self.hdfs[num_i] = None
+	@property
+	def archives(self):
+		if self.__hdfs is None: # lazy loading here!
+			self.__hdfs = [h5py.File(h5_path, "r", swmr=True) for h5_path in self.file_paths]
+		return self.__hdfs
+	def preprocess_image(self, image):
+		image = image.astype(np.float32)
+		if self.color_type == 'bgr':
+			image = image[..., ::-1]
+		if image.shape[0] != self.image_size[0] or image.shape[1] != self.image_size[1]:
+			image = cv2.resize(image, self.image_size, interpolation=cv2.INTER_AREA)
+		image = self.transform( image.astype(np.uint8) )
+		return image
+	def __getitem__(self, index):
+		key, idx = self.idx_to_kv[index]
+		self.hdf = self.archives[key]
+		assert self.hdf.swmr_mode
+		image = self.hdf[self.image_key][idx, :]
+		gaze_label = self.hdf[self.gaze_key][idx].astype('float') if self.gaze_key in self.hdf else np.array([0,0]).astype('float')
+		head_label = self.hdf['face_head_pose'][idx].astype('float') if 'face_head_pose' in self.hdf else np.array([0,0]).astype('float')
+		entry = {
+			'image': self.preprocess_image(image),
+			'gaze': gaze_label,
+			'head': head_label,
+			'key': key,
+			'index':index
+		}
+		return entry

examples/De_Nachtwacht.png ADDED Viewed

Git LFS Details

SHA256: f95e98d7e7a725599ae1b3f7f86978834aba8773806947a94378902540b07d58
Pointer size: 133 Bytes
Size of remote file: 12.2 MB

examples/The_Night_Watch_Frans_Banninck_Cocq.png ADDED Viewed

Git LFS Details

SHA256: 3468d4cf328e965a68e797cad000b7d3007a40fc1a5fb4d9b15620cea184ad7c
Pointer size: 131 Bytes
Size of remote file: 591 kB

gazelib/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

gazelib/draw/__init__.py ADDED Viewed

File without changes

gazelib/draw/draw_image.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import cv2
+import torch
+import numpy as np
+def recover_image( image_tensor, MEAN=[0.5, 0.5, 0.5], STD=[0.5, 0.5, 0.5]):
+	"""
+	read a tensor and recover it to image in cv2 format
+	args:
+		image_tensor: [C, H, W] or [B, C, H, W]
+	return:
+		image_save: [B, H, W, C]
+	"""
+	if image_tensor.ndim == 3:
+		image_tensor = image_tensor.unsqueeze(0)
+	x = torch.mul(image_tensor, torch.FloatTensor(STD).view(3,1,1).to(image_tensor.device))
+	x = torch.add(x, torch.FloatTensor(MEAN).view(3,1,1).to(image_tensor.device) )
+	x = x.data.cpu().numpy()
+	# [C, H, W] -> [H, W, C]
+	image_rgb = np.transpose(x, (0, 2, 3, 1))
+	# RGB -> BGR
+	image_bgr = image_rgb[:, :, :, [2,1,0]]
+	# float -> int
+	image_save = np.clip(image_bgr*255, 0, 255).astype('uint8')
+	return image_save
+def draw_lm(image, landmarks, color= (0, 0, 255), radius = 20, print_idx=False):
+	i = 0
+	image_out = image.copy()
+	for x,y in landmarks:
+		# Radius of circle
+		# Line thickness of 2 px
+		thickness = -1
+		image_out = cv2.circle(image_out, (int(x), int(y)), radius, color, thickness)
+		if print_idx:
+			image_out = cv2.putText(image_out,
+				text=str(i),
+				org=(int(x), int(y)),
+				fontFace=cv2.FONT_HERSHEY_SIMPLEX,
+				fontScale=2.0,
+				color=color,
+				thickness=2,
+				lineType=cv2.LINE_4)
+		i += 1
+	return image_out
+def draw_gaze(image_in, pitchyaw, thickness=2, color=(0, 0, 255)):
+	"""Draw gaze angle on given image with a given eye positions."""
+	image_out = image_in.copy()
+	(h, w) = image_in.shape[:2]
+	length = w / 2.0
+	pos = (int(h / 2.0), int(w / 2.0))
+	if len(image_out.shape) == 2 or image_out.shape[2] == 1:
+		image_out = cv2.cvtColor(image_out, cv2.COLOR_GRAY2BGR)
+	dx = -length * np.sin(pitchyaw[1]) * np.cos(pitchyaw[0])
+	dy = -length * np.sin(pitchyaw[0])
+	cv2.arrowedLine(image_out, tuple(np.round(pos).astype(np.int32)),
+				   tuple(np.round([pos[0] + dx, pos[1] + dy]).astype(int)), color,
+				   thickness, cv2.LINE_AA, tipLength=0.2)
+	return image_out

gazelib/gaze/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .gaze_utils import pitchyaw_to_vector, vector_to_pitchyaw, angular_error

gazelib/gaze/gaze_utils.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import os
+import numpy as np
+import imageio
+import cv2
+import h5py
+import math
+import torch
+def pitchyaw_to_vector(pitchyaws):
+    r"""Convert given yaw (:math:`\theta`) and pitch (:math:`\phi`) angles to unit gaze vectors.
+    Args:
+        pitchyaws: Input array of yaw and pitch angles, either numpy array or tensor.
+    Returns:
+        Output array of shape (n x 3) with 3D vectors per row, of the same type as the input.
+    """
+    if isinstance(pitchyaws, np.ndarray):
+        return pitchyaw_to_vector_numpy(pitchyaws)
+    elif isinstance(pitchyaws, torch.Tensor):
+        return pitchyaw_to_vector_torch(pitchyaws)
+    else:
+        raise ValueError("Unsupported input type. Only numpy arrays and torch tensors are supported.")
+def pitchyaw_to_vector_numpy(pitchyaws):
+    n = pitchyaws.shape[0]
+    sin = np.sin(pitchyaws)
+    cos = np.cos(pitchyaws)
+    out = np.empty((n, 3))
+    out[:, 0] = np.multiply(cos[:, 0], sin[:, 1])
+    out[:, 1] = sin[:, 0]
+    out[:, 2] = np.multiply(cos[:, 0], cos[:, 1])
+    return out
+def pitchyaw_to_vector_torch(pitchyaws):
+    n = pitchyaws.size()[0]
+    sin = torch.sin(pitchyaws)
+    cos = torch.cos(pitchyaws)
+    out = torch.empty((n, 3), device=pitchyaws.device)
+    out[:, 0] = torch.mul(cos[:, 0], sin[:, 1])
+    out[:, 1] = sin[:, 0]
+    out[:, 2] = torch.mul(cos[:, 0], cos[:, 1])
+    return out
+def vector_to_pitchyaw(vectors):
+    """Convert given gaze vectors to pitch (theta) and yaw (phi) angles.
+    Args:
+        vectors: Input array of gaze vectors, either numpy array or tensor.
+    Returns:
+        Output array of shape (n x 2) with pitch and yaw angles, of the same type as the input.
+    """
+    if isinstance(vectors, np.ndarray):
+        return vector_to_pitchyaw_numpy(vectors)
+    elif isinstance(vectors, torch.Tensor):
+        return vector_to_pitchyaw_torch(vectors)
+    else:
+        raise ValueError("Unsupported input type. Only numpy arrays and torch tensors are supported.")
+def vector_to_pitchyaw_numpy(vectors):
+    n = vectors.shape[0]
+    vectors = vectors / np.linalg.norm(vectors, axis=1).reshape(n, 1)
+    out = np.empty((n, 2))
+    out[:, 0] = np.arcsin(vectors[:, 1])  # theta
+    out[:, 1] = np.arctan2(vectors[:, 0], vectors[:, 2])  # phi
+    return out
+def vector_to_pitchyaw_torch(vectors):
+    n = vectors.size()[0]
+    vectors = vectors / torch.norm(vectors, dim=1).reshape(n, 1)
+    out = torch.empty((n, 2), device=vectors.device)
+    out[:, 0] = torch.asin(vectors[:, 1])  # theta
+    out[:, 1] = torch.atan2(vectors[:, 0], vectors[:, 2])  # phi
+    return out
+def angular_error(a, b):
+    """Calculate angular error (via cosine similarity)."""
+    if isinstance(a, np.ndarray) and isinstance(b, np.ndarray):
+        return angular_error_numpy(a, b)
+    elif isinstance(a, torch.Tensor) and isinstance(b, torch.Tensor):
+        return angular_error_torch(a, b)
+    else:
+        raise ValueError("Input type mismatch. Both inputs should be either numpy arrays or torch tensors.")
+def angular_error_numpy(a, b):
+    """Calculate angular error for numpy arrays."""
+    a = pitchyaw_to_vector(a) if a.shape[1] == 2 else a
+    b = pitchyaw_to_vector(b) if b.shape[1] == 2 else b
+    ab = np.sum(np.multiply(a, b), axis=1)
+    a_norm = np.linalg.norm(a, axis=1)
+    b_norm = np.linalg.norm(b, axis=1)
+    # Avoid zero-values (to avoid NaNs)
+    a_norm = np.clip(a_norm, a_min=1e-7, a_max=None)
+    b_norm = np.clip(b_norm, a_min=1e-7, a_max=None)
+    similarity = np.divide(ab, np.multiply(a_norm, b_norm))
+    return np.arccos(similarity) * 180.0 / np.pi
+def angular_error_torch(a, b):
+    """Calculate angular error for torch tensors."""
+    a = pitchyaw_to_vector(a) if a.size()[1] == 2 else a
+    b = pitchyaw_to_vector(b) if b.size()[1] == 2 else b
+    ab = torch.sum(a * b, dim=1)
+    a_norm = torch.norm(a, dim=1)
+    b_norm = torch.norm(b, dim=1)
+    # Avoid zero-values (to avoid NaNs)
+    a_norm = torch.clamp(a_norm, min=1e-7)
+    b_norm = torch.clamp(b_norm, min=1e-7)
+    similarity = ab / (a_norm * b_norm)
+    return torch.acos(similarity) * 180.0 / np.pi
+def cos_similarity(a, b):
+    """Calculate angular error (via cosine similarity)."""
+    if isinstance(a, np.ndarray) and isinstance(b, np.ndarray):
+        return cos_similarity_numpy(a, b)
+    elif isinstance(a, torch.Tensor) and isinstance(b, torch.Tensor):
+        return cos_similarity_torch(a, b)
+    else:
+        raise ValueError("Input type mismatch. Both inputs should be either numpy arrays or torch tensors.")
+def cos_similarity_numpy(a, b):
+    """Calculate angular error for numpy arrays."""
+    a = pitchyaw_to_vector(a) if a.shape[1] == 2 else a
+    b = pitchyaw_to_vector(b) if b.shape[1] == 2 else b
+    ab = np.sum(np.multiply(a, b), axis=1)
+    a_norm = np.linalg.norm(a, axis=1)
+    b_norm = np.linalg.norm(b, axis=1)
+    # Avoid zero-values (to avoid NaNs)
+    a_norm = np.clip(a_norm, a_min=1e-7, a_max=None)
+    b_norm = np.clip(b_norm, a_min=1e-7, a_max=None)
+    similarity = np.divide(ab, np.multiply(a_norm, b_norm))
+    similarity = np.clip(similarity, min=0., max=1.)
+    return similarity
+def cos_similarity_torch(a, b):
+    """Calculate angular error for torch tensors."""
+    a = pitchyaw_to_vector(a) if a.size()[1] == 2 else a
+    b = pitchyaw_to_vector(b) if b.size()[1] == 2 else b
+    ab = torch.sum(a * b, dim=1)
+    a_norm = torch.norm(a, dim=1)
+    b_norm = torch.norm(b, dim=1)
+    # Avoid zero-values (to avoid NaNs)
+    a_norm = torch.clamp(a_norm, min=1e-7)
+    b_norm = torch.clamp(b_norm, min=1e-7)
+    similarity = ab / (a_norm * b_norm)
+    similarity = torch.clamp(similarity, min=0., max=1.)
+    return similarity

gazelib/gaze/normalize.py ADDED Viewed

	@@ -0,0 +1,266 @@

+# -*- coding: utf-8 -*-
+"""
+######################################################################################################################################
+This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. To view a copy of this license,
+visit http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
+Any publications arising from the use of this software, including but
+not limited to academic journal and conference publications, technical
+reports and manuals, must cite at least one of the following works:
+Revisiting Data Normalization for Appearance-Based Gaze Estimation
+Xucong Zhang, Yusuke Sugano, Andreas Bulling
+in Proc. International Symposium on Eye Tracking Research and Applications (ETRA), 2018
+######################################################################################################################################
+"""
+import os
+import cv2
+import numpy as np
+import csv
+import argparse
+# import dlib
+import glob
+def normalize_woimg(landmarks, focal_norm, distance_norm, roi_size, center, hr, ht, cam, gc=None):
+	center = center.reshape(3,1)
+	## universal function for data normalization
+	hR = cv2.Rodrigues(hr)[0] # rotation matrix
+	## ---------- normalize image ----------
+	distance = np.linalg.norm(center) # actual distance between eye and original camera
+	z_scale = distance_norm/distance
+	cam_norm = np.array([
+		[focal_norm, 0, roi_size[0]/2],
+		[0, focal_norm, roi_size[1]/2],
+		[0, 0, 1.0],
+	])
+	S = np.array([ # scaling matrix
+		[1.0, 0.0, 0.0],
+		[0.0, 1.0, 0.0],
+		[0.0, 0.0, z_scale],
+	])
+	hRx = hR[:,0]
+	forward = (center/distance).reshape(3)
+	down = np.cross(forward, hRx)
+	down /= np.linalg.norm(down)
+	right = np.cross(down, forward)
+	right /= np.linalg.norm(right)
+	R = np.c_[right, down, forward].T # rotation matrix R
+	W = np.dot(np.dot(cam_norm, S), np.dot(R, np.linalg.inv(cam))) # transformation matrix
+	## ---------- normalize rotation ----------
+	hR_norm = np.dot(R, hR) # rotation matrix in normalized space
+	# hr_norm = cv2.Rodrigues(hR_norm)[0] # convert rotation matrix to rotation vectors
+	## ---------- normalize gaze vector ----------
+	gc_normalized = None
+	num_point = landmarks.shape[0]
+	landmarks_warped = cv2.perspectiveTransform(landmarks.reshape(-1,1,2).astype('float32'), W)
+	landmarks_warped = landmarks_warped.reshape(num_point, 2)
+	if gc is not None:
+		gc_normalized = gc.reshape((3,1)) - center # gaze vector
+		# For modified data normalization, scaling is not applied to gaze direction (only R applied).
+		# For original data normalization, here should be:
+		# "M = np.dot(S,R)
+		# gc_normalized = np.dot(R, gc_normalized)"
+		gc_normalized = np.dot(R, gc_normalized)
+		gc_normalized = gc_normalized/np.linalg.norm(gc_normalized)
+	return [None, R, hR_norm, gc_normalized, landmarks_warped, W]
+def normalize(img, landmarks, focal_norm, distance_norm, roi_size, center, hr, ht, cam, gc=None):
+	center = center.reshape(3,1)
+	## universal function for data normalization
+	hR = cv2.Rodrigues(hr)[0] # rotation matrix
+	## ---------- normalize image ----------
+	distance = np.linalg.norm(center) # actual distance between eye and original camera
+	z_scale = distance_norm/distance
+	cam_norm = np.array([
+		[focal_norm, 0, roi_size[0]/2],
+		[0, focal_norm, roi_size[1]/2],
+		[0, 0, 1.0],
+	])
+	S = np.array([ # scaling matrix
+		[1.0, 0.0, 0.0],
+		[0.0, 1.0, 0.0],
+		[0.0, 0.0, z_scale],
+	])
+	hRx = hR[:,0]
+	forward = (center/distance).reshape(3)
+	down = np.cross(forward, hRx)
+	down /= np.linalg.norm(down)
+	right = np.cross(down, forward)
+	right /= np.linalg.norm(right)
+	R = np.c_[right, down, forward].T # rotation matrix R
+	W = np.dot(np.dot(cam_norm, S), np.dot(R, np.linalg.inv(cam))) # transformation matrix
+	# if img is not None:
+	# 	img_warped = cv2.warpPerspective(img, W, roi_size) # image normalization
+	# else:
+	# 	img_warped = None
+	img_warped = cv2.warpPerspective(img, W, roi_size) # image normalization
+	## ---------- normalize rotation ----------
+	hR_norm = np.dot(R, hR) # rotation matrix in normalized space
+	# hr_norm = cv2.Rodrigues(hR_norm)[0] # convert rotation matrix to rotation vectors
+	## ---------- normalize gaze vector ----------
+	gc_normalized = None
+	num_point = landmarks.shape[0]
+	landmarks_warped = cv2.perspectiveTransform(landmarks.reshape(-1,1,2).astype('float32'), W)
+	landmarks_warped = landmarks_warped.reshape(num_point, 2)
+	if gc is not None:
+		gc_normalized = gc.reshape((3,1)) - center # gaze vector
+		# For modified data normalization, scaling is not applied to gaze direction (only R applied).
+		# For original data normalization, here should be:
+		# "M = np.dot(S,R)
+		# gc_normalized = np.dot(R, gc_normalized)"
+		gc_normalized = np.dot(R, gc_normalized)
+		gc_normalized = gc_normalized/np.linalg.norm(gc_normalized)
+	return [img_warped, R, hR_norm, gc_normalized, landmarks_warped, W]
+def normalize_face(img, face, hr, ht, cam, gc=None):
+	## normalized camera parameters
+	focal_norm = 960 # focal length of normalized camera
+	distance_norm = 600 # normalized distance between eye and camera
+	roi_size = (224, 224) # size of cropped eye image
+	## compute estimated 3D positions of the landmarks
+	ht = ht.reshape((3,1))
+	hR = cv2.Rodrigues(hr)[0] # rotation matrix
+	Fc = np.dot(hR, face) + ht # 3D positions of facial landmarks
+	# fm = np.mean(Fc, axis=1).reshape((3,1)) # center of facial landmarks
+	two_eye_center = np.mean(Fc[:, 0:4], axis=1).reshape((3, 1))
+	nose_center = np.mean(Fc[:, 4:6], axis=1).reshape((3, 1))
+	# get the face center
+	face_center = np.mean(np.concatenate((two_eye_center, nose_center), axis=1), axis=1).reshape((3, 1))
+	# face_center = np.mean(Fc, axis=1).reshape((3,1))
+	return normalize(img, focal_norm, distance_norm, roi_size, face_center, hr, ht, cam, gc)
+def normalize_eye(img, face, hr, ht, cam, gc=None):
+	## normalized camera parameters
+	focal_norm = 960 # focal length of normalized camera
+	distance_norm = 600 # normalized distance between eye and camera
+	roi_size = (60, 36) # size of cropped eye image
+	## compute estimated 3D positions of the landmarks
+	ht = ht.reshape((3,1))
+	hR = cv2.Rodrigues(hr)[0] # rotation matrix
+	Fc = np.dot(hR, face) + ht # 3D positions of facial landmarks
+	re = 0.5*(Fc[:,0] + Fc[:,1]).reshape((3,1)) # center of left eye
+	le = 0.5*(Fc[:,2] + Fc[:,3]).reshape((3,1)) # center of right eye
+	## normalize each eye
+	data = [
+		normalize(img, focal_norm, distance_norm, roi_size, re, hr, ht, cam, gc),
+		normalize(img, focal_norm, distance_norm, roi_size, le, hr, ht, cam, gc)
+	]
+	return data
+def load_calibration(calib_path):
+	## load calibration data, these paramters are expected to be obtained by camera calibration functions in OpenCV
+	fs = cv2.FileStorage(calib_path, cv2.FILE_STORAGE_READ)
+	camera_matrix = fs.getNode('camera_matrix').mat()
+	camera_distortion = fs.getNode('dist_coeffs').mat()
+	return camera_matrix, camera_distortion
+def load_facemodel(model_path):
+	# load the generic face model, which includes 6 facial landmarks: four eye corners and two mouth corners
+	fs = cv2.FileStorage(model_path, cv2.FILE_STORAGE_READ)
+	face_model = fs.getNode('face_model').mat()
+	return face_model
+def read_image(img_path, camera_matrix, camera_distortion):
+	# load input image and undistort
+	img_original = cv2.imread(img_path)
+	img = cv2.undistort(img_original, camera_matrix, camera_distortion)
+	return img
+def estimateHeadPose(landmarks, face_model, camera, distortion, iterate=True):
+	ret, rvec, tvec = cv2.solvePnP(face_model, landmarks, camera, distortion, flags=cv2.SOLVEPNP_EPNP)
+	## further optimize
+	if iterate:
+		ret, rvec, tvec = cv2.solvePnP(face_model, landmarks, camera, distortion, rvec, tvec, True)
+	return rvec, tvec
+def detect_landmark(img, detector_path, predictor_path):
+	## obtain facial landmarks using dlib
+	detector = dlib.cnn_face_detection_model_v1(detector_path)
+	dets = detector(img, 0)
+	if len(dets) == 0:
+		return None
+	predictor = dlib.shape_predictor(predictor_path)
+	shape = predictor(img, dets[0].rect)
+	## extract required keypoints
+	landmarks = np.array([
+		[shape.part(36).x, shape.part(36).y],
+		[shape.part(39).x, shape.part(39).y],
+		[shape.part(42).x, shape.part(42).y],
+		[shape.part(45).x, shape.part(45).y],
+		[shape.part(48).x, shape.part(48).y],
+		[shape.part(54).x, shape.part(54).y]
+	])
+	return landmarks
+def read_landmark(img_path):
+	img_file = img_path.split(os.path.sep)[-1]
+	day = img_path.split(os.path.sep)[-2]
+	person = img_path.split(os.path.sep)[-3]
+	person_path = os.path.split(os.path.split(img_path)[0])[0]
+	person_txt = os.path.join(person_path, person+'.txt')
+	index = os.path.join(day,img_file)
+	print(person_txt)
+	print(index)
+	with open(person_txt) as f:
+		data = f.readlines()
+	reader = csv.reader(data)
+	p = {}
+	for row in reader:
+		words = row[0].split()
+		p[words[0]] = words[1:]
+	landmarks = np.array([int(i) for i in p[index][2:14]]).reshape((6,2))
+	return landmarks
+# def process_image(img_path, detector_path, predictor_path, camera_matrix, camera_distortion, face_model, gc=None):
+#     # read input image
+#     img = read_image(img_path, camera_matrix, camera_distortion)
+#     # detect facial landmarks
+#     landmarks = detect_landmark(img, detector_path, predictor_path)
+#     if landmarks is not None:
+#         # estimate head pose
+#         hr, ht = estimateHeadPose(face_model, landmarks, camera_matrix, camera_distortion)
+#         # data normalization for left and right eye image
+#         normalized_eyes = normalize_eye(img, face_model, hr, ht, camera_matrix, gc)
+#         # data normalization for full face
+#         normalized_face = normalize_face(img, face_model, hr, ht, camera_matrix, gc)
+#         # return a list of [reye, leye, face]
+#         return normalized_eyes + [normalized_face]

gazelib/label_transform.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import cv2
+import numpy as np
+def get_eye_nose_landmarks(landmarks):
+    assert landmarks.shape[0]==50 or landmarks.shape[0]==68
+    if landmarks.shape[0] == 50:
+        lm_6 = landmarks[[20, 23, 26, 29, 15, 19], :]  # the eye and nose landmarks
+    elif landmarks.shape[0] == 68:
+        lm_6 = landmarks[[36, 39, 42, 45, 31, 35], :]  # the eye and nose landmarks
+    return lm_6
+def get_eye_mouth_landmarks(landmarks):
+    assert landmarks.shape[0]==50 or landmarks.shape[0]==68
+    if landmarks.shape[0] == 50:
+        lm_6 = landmarks[[20, 23, 26, 29, 32, 38], :]  # the eye and nose landmarks
+    elif landmarks.shape[0] == 68:
+        lm_6 = landmarks[[36,39,42,45,48,54], :]  # the eye and nose landmarks
+    return lm_6
+def mean_eye_nose(landmarks):
+    assert landmarks.shape[0]==6
+    # get the face center
+    two_eye_center = np.mean(landmarks[0:4, :], axis=0).reshape(1,-1)
+    nose_center = np.mean(landmarks[4:6, :], axis=0).reshape(1,-1)
+    face_center = np.mean(np.concatenate((two_eye_center, nose_center), axis=0), axis=0).reshape(1,-1)
+    return face_center
+def mean_eye_mouth(landmarks):
+    assert landmarks.shape[0]==6
+    face_center = np.mean(landmarks, axis=0).reshape(1,-1)
+    return face_center
+def get_face_center_by_nose(hR, ht, face_model_load):
+    face_model = get_eye_nose_landmarks(face_model_load)  # the eye and nose landmarks
+    Fc = np.dot(hR, face_model.T) + ht # 3D positions of facial landmarks
+    face_center = mean_eye_nose(Fc.T).reshape((3, 1))  # get the face center
+    return face_center, Fc
+def get_face_center_by_mouth(hR, ht, face_model_load):
+    face_model = get_eye_mouth_landmarks(face_model_load)  # the eye and nose landmarks
+    Fc = np.dot(hR, face_model.T) + ht # 3D positions of facial landmarks
+    face_center = mean_eye_mouth(Fc.T).reshape((3, 1))  # get the face center
+    return face_center, Fc
+def lm68_to_50(lm_68):
+	'''
+	lm_68: (68,2)
+	'''
+	lm_50 = np.zeros((50,2))
+	lm_50[0] = lm_68[8]
+	lm_50[1:44] = lm_68[17:60]
+	lm_50[44:47] = lm_68[61:64]
+	lm_50[47:50] = lm_68[65:68]
+	return lm_50
+def lm68_subset(lm_68, NUM_KPTS_TO_USE):
+	'''
+	lm_68: (68,2)
+	'''
+	if NUM_KPTS_TO_USE == 6:
+		lm_68 = np.array(lm_68, dtype=np.float32)
+		return lm_68[[36, 39, 42, 45, 31, 35], :]
+	elif NUM_KPTS_TO_USE ==50:
+		return lm68_to_50(lm_68)
+	else:
+		print('not supported yet')
+		exit(0)
+def lm50_subset(lm_50, NUM_KPTS_TO_USE):
+	'''
+	lm_50: (50,2)
+	'''
+	lm_50 = lm_50.copy()
+	if NUM_KPTS_TO_USE == 6:
+		lm_50 = lm_50[[20, 23, 26, 29, 15, 19], :]
+		return lm_50
+	elif NUM_KPTS_TO_USE ==50:
+		return lm_50
+	else:
+		print('not supported yet')
+		exit(0)
+def get_face_center(landmarks_3d):
+	'''
+	landmarks_3d: (3, 6)
+	-->
+	face_center: (3,1)
+	'''
+	two_eye_center = np.mean(landmarks_3d[:, 0:4], axis=1).reshape((3, 1))
+	nose_center = np.mean(landmarks_3d[:, 4:6], axis=1).reshape((3, 1))
+	face_center = np.mean(np.concatenate((two_eye_center, nose_center), axis=1), axis=1).reshape((3, 1))
+	return face_center
+def compute_R(lm6, dataname):
+	'''
+	6 landmarks in opencv coordinate
+	dataname: mpii or xgaze
+	the face center are computed differently
+		for mpii: the 6 landmarks are 4 eye + 2 mouth
+		for xgaze: the 6 landmarks are 4 eye + 2 nose
+	'''
+	if dataname=='mpii':
+		left_center = np.mean(lm6[2:4,:],axis=0)
+		right_center = np.mean(lm6[:2,:],axis=0)
+		face_center = np.mean(lm6,axis=0)
+	elif dataname=='xgaze':
+		left_center = np.mean(lm6[2:4,:],axis=0)
+		right_center = np.mean(lm6[:2,:],axis=0)
+		nose_center = np.mean(lm6[[4,5],:],axis=0)
+		face_center = ( (left_center + right_center)/2 + nose_center ) /2
+	distance = np.linalg.norm(face_center)
+	hRx = left_center - right_center
+	hRx /= np.linalg.norm(hRx)
+	forward = (face_center/distance).reshape(3)
+	down = np.cross(forward, hRx)
+	down /= np.linalg.norm(down)
+	right = np.cross(down, forward)
+	right /= np.linalg.norm(right)
+	R = np.c_[right, down, forward].T
+	return R
+def rotation_matrix(x, y, z):
+	'''
+	x, y, z: roll, pitch, yaw, (radians)
+	'''
+	Rx = np.array([[1,0,0],
+				[0, np.cos(x), -np.sin(x)],
+				[0, np.sin(x), np.cos(x)]])
+	Ry = np.array([[ np.cos(y), 0, np.sin(y)],
+				[ 0,         1,         0],
+				[-np.sin(y), 0, np.cos(y)]])
+	Rz = np.array([[np.cos(z), -np.sin(z), 0],
+				[np.sin(z),  np.cos(z), 0],
+				[0,0,1]])
+	return Rz@Ry@Rx
+def get_rotation(from_pose, target_pose):
+	rotation1 = rotation_matrix( -from_pose[0], from_pose[1], 0)
+	rotation2 = rotation_matrix(-target_pose[0], target_pose[1], 0)
+	rotation = rotation2@np.linalg.inv(rotation1)
+	return rotation
+def hR_2_hr(hR):
+	hr = np.array([np.arcsin(hR[1, 2]),
+				np.arctan2(hR[0, 2], hR[2, 2])])
+	return hr
+def hr_2_hR(hr):
+	hR = rotation_matrix( -hr[0], hr[1], 0)
+	return hR
+if __name__ == '__main__':
+	# hr_norm = np.array([0.15, 0.2])
+	# pose = np.array([-0.1, 0.3])
+	# rotation1 = rotation_matrix( -hr_norm[0], hr_norm[1], 0)
+	# rot = cv2.Rodrigues( np.array([hr_norm[0], hr_norm[1], 0])  )[0]
+	def to_hR(hr_norm):
+		hR_norm = rotation_matrix( -hr_norm[0], hr_norm[1], 0)
+		return hR_norm
+	hr1 = np.array([0.15, 0.2])
+	hr2 = np.array([0.10, 0.2])
+	hr_t =  np.array([-0.1, 0.3])
+	hR1 = to_hR(hr1)
+	hR2 = to_hR(hr2)
+	print('hR1: ', hR1)
+	print('hR2: ', hR2)
+	R1t = get_rotation(hr1, hr_t)
+	hR1_ = np.dot(R1t, hR1)
+	print('rotated hR_: ', hR1_)
+	hr1_ = np.array([np.arcsin(hR1_[1, 2]),
+				np.arctan2(hR1_[0, 2], hR1_[2, 2])])
+	print('rotated hr1_: ', hr1_)
+	print('hR t: ', to_hR(hr_t))
+	hR2_ = np.dot(R1t, hR2)
+	print('rotated hR2_: ', hR2_)
+	# rotation2 = rotation_matrix( -pose[0], pose[1], 0)

gazelib/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@


1	+
2	+
3	+ from .h5_utils import add, to_h5
4	+

gazelib/utils/color_text.py ADDED Viewed

	@@ -0,0 +1,85 @@

+class ColorText:
+    """A simple text processor for printing colored text to the terminal."""
+    colors = {
+        'black': '\033[30m',
+        'red': '\033[31m',
+        'green': '\033[32m',
+        'yellow': '\033[33m',
+        'blue': '\033[34m',
+        'magenta': '\033[35m',
+        'cyan': '\033[36m',
+        'white': '\033[37m',
+        'reset': '\033[0m'
+    }
+    @classmethod
+    def colorize(cls, text, color):
+        """Colorize the given text using the specified color."""
+        return f'{cls.colors[color]}{text}{cls.colors["reset"]}'
+    @classmethod
+    def black(cls, text):
+        """Colorize the given text with black."""
+        return cls.colorize(text, 'black')
+    @classmethod
+    def red(cls, text):
+        """Colorize the given text with red."""
+        return cls.colorize(text, 'red')
+    @classmethod
+    def green(cls, text):
+        """Colorize the given text with green."""
+        return cls.colorize(text, 'green')
+    @classmethod
+    def yellow(cls, text):
+        """Colorize the given text with yellow."""
+        return cls.colorize(text, 'yellow')
+    @classmethod
+    def blue(cls, text):
+        """Colorize the given text with blue."""
+        return cls.colorize(text, 'blue')
+    @classmethod
+    def magenta(cls, text):
+        """Colorize the given text with magenta."""
+        return cls.colorize(text, 'magenta')
+    @classmethod
+    def cyan(cls, text):
+        """Colorize the given text with cyan."""
+        return cls.colorize(text, 'cyan')
+    @classmethod
+    def white(cls, text):
+        """Colorize the given text with white."""
+        return cls.colorize(text, 'white')
+def print_green(*args, **kwargs):
+	out = ' '.join([str(arg) for arg in args])
+	print(ColorText.green(out))
+def print_yellow(*args, **kwargs):
+	out = ' '.join([str(arg) for arg in args])
+	print(ColorText.yellow(out))
+def print_magenta(*args, **kwargs):
+	out = ' '.join([str(arg) for arg in args])
+	print(ColorText.magenta(out))
+def print_cyan(*args, **kwargs):
+	out = ' '.join([str(arg) for arg in args])
+	print(ColorText.cyan(out))
+def print_red(*args, **kwargs):
+	out = ' '.join([str(arg) for arg in args])
+	print(ColorText.red(out))
+if __name__ == '__main__':
+    print(ColorText.red('red'))
+    print(ColorText.green('green'))
+    print(ColorText.yellow('yellow'))
+    print(ColorText.blue('blue'))
+    print(ColorText.magenta('magenta'))
+    print(ColorText.cyan('cyan'))
+    print(ColorText.white('white'))
+    print(ColorText.black('black'))

gazelib/utils/h5_utils.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import os
+import numpy as np
+import imageio
+import cv2
+import h5py
+import math
+def add(to_write, key, value):  # noqa
+	if key not in to_write:
+		to_write[key] = [value]
+	else:
+		to_write[key].append(value)
+def to_h5(to_write, output_path):
+	for key, values in to_write.items():
+		to_write[key] = np.asarray(values)
+		# print('%s: ' % key, to_write[key].shape)
+	if not os.path.isfile(output_path):
+		with h5py.File(output_path, 'w') as f:
+			for key, values in to_write.items():
+				print("values.shape: ", values.shape)
+				f.create_dataset(
+					key, data=values,
+					chunks=(
+						tuple([1] + list(values.shape[1:]))
+						if isinstance(values, np.ndarray)
+						else None
+					),
+					compression='lzf',
+					maxshape=tuple([None] + list(values.shape[1:])),
+				)
+				print("chunks: ", f[key].chunks)
+	else:
+		with h5py.File(output_path, 'a') as f:
+			for key, values in to_write.items():
+				if key not in list(f.keys()):
+					print('write it to f {}'.format(output_path))
+					f.create_dataset(
+						key, data=values,
+						chunks=(
+							tuple([1] + list(values.shape[1:]))
+							if isinstance(values, np.ndarray)
+							else None
+						),
+						compression='lzf',
+						maxshape=tuple([None] + list(values.shape[1:])),
+					)
+				else:
+					data = f[key]
+					data.resize(data.shape[0] + values.shape[0], axis=0)
+					data[-values.shape[0]:] = values

models/hybrid_tr.py ADDED Viewed

	@@ -0,0 +1,570 @@

+import os
+import sys
+import torch
+import torch.nn as nn
+import torchvision.models as models
+import numpy as np
+import math
+import copy
+# from modules.resnet_v1 import resnet50
+import torch.utils.model_zoo as model_zoo
+from torch.utils.model_zoo import load_url as load_state_dict_from_url
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+           'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
+           'wide_resnet50_2', 'wide_resnet101_2']
+def _resnet(arch, block, layers, pretrained, progress, **kwargs):
+    model = ResFeature(block, layers, **kwargs)
+    if pretrained:
+        state_dict = load_state_dict_from_url(model_urls[arch],
+                                              progress=progress)
+        model.load_state_dict(state_dict, strict=False)
+    return model
+def resnet18(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-18 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>'_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
+                   **kwargs)
+def resnet34(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-34 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>'_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
+                   **kwargs)
+def resnet50(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-50 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>'_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
+                   **kwargs)
+def resnet101(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-101 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>'_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
+                   **kwargs)
+def resnet152(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-152 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>'_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
+                   **kwargs)
+def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
+    r"""ResNeXt-50 32x4d model from
+    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 4
+    return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
+                   pretrained, progress, **kwargs)
+def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
+    r"""ResNeXt-101 32x8d model from
+    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 8
+    return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
+                   pretrained, progress, **kwargs)
+def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
+    r"""Wide ResNet-50-2 model from
+    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
+    The model is the same as ResNet except for the bottleneck number of channels
+    which is twice larger in every block. The number of channels in outer 1x1
+    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
+    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['width_per_group'] = 64 * 2
+    return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
+                   pretrained, progress, **kwargs)
+def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
+    r"""Wide ResNet-101-2 model from
+    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
+    The model is the same as ResNet except for the bottleneck number of channels
+    which is twice larger in every block. The number of channels in outer 1x1
+    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
+    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['width_per_group'] = 64 * 2
+    return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
+                   pretrained, progress, **kwargs)
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+    'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
+    'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
+    'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
+    'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
+}
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=dilation, groups=groups, bias=False, dilation=dilation)
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(BasicBlock, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class ResFeature(nn.Module):
+    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
+                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
+                 norm_layer=None):
+        super(ResFeature, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError("replace_stride_with_dilation should be None "
+                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
+                                       dilate=replace_stride_with_dilation[0])
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                       dilate=replace_stride_with_dilation[1])
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+                                       dilate=replace_stride_with_dilation[2])
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+                elif isinstance(m, BasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+                            self.base_width, previous_dilation, norm_layer))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, dilation=self.dilation,
+                                norm_layer=norm_layer))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+class ResGazeEs(nn.Module):
+    def __init__(self, ):
+        super(ResGazeEs, self).__init__()
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(2048, 2)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+    def forward(self, x):
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        return x
+class CNN_Model(nn.Module):
+    def __init__(self):
+        super(CNN_Model, self).__init__()
+        self.feature =  resnet50(pretrained=True)
+        # self.feature.load_state_dict(torch.load(pretrained_url), strict=False )
+        self.gazeEs = ResGazeEs()
+        # self.gazeEs.load_state_dict(torch.load(pretrained_url), strict=False )
+    def forward(self, x_in):
+        features = self.feature(x_in)
+        gaze = self.gazeEs(features)
+        return gaze, features
+class TransformerEncoder(nn.Module):
+    def __init__(self, encoder_layer, num_layers, norm=None):
+        super().__init__()
+        self.layers = nn.ModuleList([copy.deepcopy(encoder_layer) for i in range(num_layers)])
+        self.num_layers = num_layers
+        self.norm = norm
+    def forward(self, src, pos):
+        output = src
+        for layer in self.layers:
+            output = layer(output, pos)
+        if self.norm is not None:
+            output = self.norm(output)
+        return output
+class TransformerEncoderLayer(nn.Module):
+    def __init__(self, d_model, nhead, dim_feedforward=512, dropout=0.1):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        # Implementation of Feedforward model
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.activation = nn.ReLU(inplace=True)
+    def pos_embed(self, src, pos):
+        batch_pos = pos.unsqueeze(1).repeat(1, src.size(1), 1)
+        return src + batch_pos
+    def forward(self, src, pos):
+        # src_mask: Optional[Tensor] = None,
+        # src_key_padding_mask: Optional[Tensor] = None):
+        # pos: Optional[Tensor] = None):
+        q = k = self.pos_embed(src, pos)
+        src2 = self.self_attn(q, k, value=src)[0]
+        src = src + self.dropout1(src2)
+        src = self.norm1(src)
+        src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
+        src = src + self.dropout2(src2)
+        src = self.norm2(src)
+        return src
+class FeatureTransformer(nn.Module):
+    '''
+    This is the end head which is included in the resnet18 (in official code)
+    To avoid ambiguity, extract this part out of resnet18
+    '''
+    def __init__(self, in_channels=512, maps=32):
+        super(FeatureTransformer, self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_channels, maps, 1),
+            nn.BatchNorm2d(maps),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        x = self.conv(x)
+        return x
+class HybridTR18(nn.Module):
+    def __init__(self):
+        super().__init__()
+        maps = 32
+        nhead = 8
+        dim_feature = 7*7
+        dim_feedforward=512
+        dropout = 0.1
+        num_layers=6
+        self.base_model = resnet18(pretrained=True) #False, maps=maps)
+        self.base_model_head = FeatureTransformer(in_channels=dim_feedforward, maps=maps)
+        # d_model: dim of Q, K, V
+        # nhead: seq num
+        # dim_feedforward: dim of hidden linear layers
+        # dropout: prob
+        encoder_layer = TransformerEncoderLayer(
+                  maps,
+                  nhead,
+                  dim_feedforward,
+                  dropout)
+        encoder_norm = nn.LayerNorm(maps)
+        # num_encoder_layer: deeps of layers
+        self.encoder = TransformerEncoder(encoder_layer, num_layers, encoder_norm)
+        self.cls_token = nn.Parameter(torch.randn(1, 1, maps))
+        self.pos_embedding = nn.Embedding(dim_feature+1, maps)
+        self.feed = nn.Linear(maps, 2)
+    def forward(self, x_in, normalize_z=False):
+        output_dict = {}
+        # feature = self.base_model(x_in["face"])
+        feature = self.base_model(x_in)
+        feature = self.base_model_head(feature)
+        batch_size = feature.size(0)
+        feature = feature.flatten(2)
+        feature = feature.permute(2, 0, 1)
+        cls = self.cls_token.repeat( (1, batch_size, 1))
+        feature = torch.cat([cls, feature], 0)
+        position = torch.from_numpy(np.arange(0, 50)).cuda()
+        pos_feature = self.pos_embedding(position)
+        # feature is [HW, batch, channel]
+        feature = self.encoder(feature, pos_feature)
+        feature = feature.permute(1, 2, 0)
+        feature = feature[:,:,0]
+        pred_gaze = self.feed(feature)
+        output_dict['pred_gaze'] = pred_gaze
+        return output_dict
+class HybridTR50(nn.Module):
+    def __init__(self):
+        super().__init__()
+        maps = 32
+        nhead = 8
+        dim_feature = 7*7
+        dim_feedforward=2048
+        dropout = 0.1
+        num_layers=6
+        self.base_model = resnet50(pretrained=True) #False, maps=maps)
+        self.base_model_head = FeatureTransformer(in_channels=dim_feedforward,maps=maps)
+        # d_model: dim of Q, K, V
+        # nhead: seq num
+        # dim_feedforward: dim of hidden linear layers
+        # dropout: prob
+        encoder_layer = TransformerEncoderLayer(
+                  maps,
+                  nhead,
+                  dim_feedforward,
+                  dropout)
+        encoder_norm = nn.LayerNorm(maps)
+        # num_encoder_layer: deeps of layers
+        self.encoder = TransformerEncoder(encoder_layer, num_layers, encoder_norm)
+        self.cls_token = nn.Parameter(torch.randn(1, 1, maps))
+        self.pos_embedding = nn.Embedding(dim_feature+1, maps)
+        self.feed = nn.Linear(maps, 2)
+    def forward(self, x_in, normalize_z=False):
+        output_dict = {}
+        feature = self.base_model(x_in) ##(batch, 2048, 7, 7)
+        feature = self.base_model_head(feature) ##  (batch, 32, 7, 7)
+        batch_size = feature.size(0) ## batch size
+        feature = feature.flatten(2) ## (batch, 32, 49)
+        feature = feature.permute(2, 0, 1) ## (49, batch, 32)
+        cls = self.cls_token.repeat( (1, batch_size, 1)) ## (1, batch, 32)
+        feature = torch.cat([cls, feature], 0) ## (50, batch, 32)
+        position = torch.from_numpy(np.arange(0, 50)).cuda() ## (50,)
+        pos_feature = self.pos_embedding(position) ## (50, 32)
+        # feature is [HW, batch, channel]
+        feature = self.encoder(feature, pos_feature) ## (50, batch, 32)
+        feature = feature.permute(1, 2, 0) ## (batch, 32, 50)
+        feature = feature[:,:,0] ## (batch, 32)
+        pred_gaze = self.feed(feature) ## (batch, 2)
+        output_dict['pred_gaze'] = pred_gaze
+        return output_dict

models/resnet.py ADDED Viewed

	@@ -0,0 +1,366 @@

+import torch
+import torch.nn as nn
+from torch.utils.model_zoo import load_url as load_state_dict_from_url
+import torch.nn.functional as F
+model_urls = {
+	'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+	'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+	'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+	'resnet152': 'https://download.pytorch.org/models/resnet152-394f9c45.pth'
+}
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+	"""3x3 convolution with padding"""
+	return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+					 padding=dilation, groups=groups, bias=False, dilation=dilation)
+def conv1x1(in_planes, out_planes, stride=1):
+	"""1x1 convolution"""
+	return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class BasicBlock(nn.Module):
+	expansion = 1
+	def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+				 base_width=64, dilation=1, norm_layer=None):
+		super(BasicBlock, self).__init__()
+		if norm_layer is None:
+			norm_layer = nn.BatchNorm2d
+		if groups != 1 or base_width != 64:
+			raise ValueError('BasicBlock only supports groups=1 and base_width=64')
+		if dilation > 1:
+			raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+		# Both self.conv1 and self.downsample layers downsample the input when stride != 1
+		self.conv1 = conv3x3(inplanes, planes, stride)
+		self.bn1 = norm_layer(planes)
+		self.relu = nn.ReLU(inplace=True)
+		self.conv2 = conv3x3(planes, planes)
+		self.bn2 = norm_layer(planes)
+		self.downsample = downsample
+		self.stride = stride
+	def forward(self, x):
+		identity = x
+		out = self.conv1(x)
+		out = self.bn1(out)
+		out = self.relu(out)
+		out = self.conv2(out)
+		out = self.bn2(out)
+		if self.downsample is not None:
+			identity = self.downsample(x)
+		out += identity
+		out = self.relu(out)
+		return out
+class Bottleneck(nn.Module):
+	expansion = 4
+	def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+				 base_width=64, dilation=1, norm_layer=None):
+		super(Bottleneck, self).__init__()
+		if norm_layer is None:
+			norm_layer = nn.BatchNorm2d
+		width = int(planes * (base_width / 64.)) * groups
+		# Both self.conv2 and self.downsample layers downsample the input when stride != 1
+		self.conv1 = conv1x1(inplanes, width)
+		self.bn1 = norm_layer(width)
+		self.conv2 = conv3x3(width, width, stride, groups, dilation)
+		self.bn2 = norm_layer(width)
+		self.conv3 = conv1x1(width, planes * self.expansion)
+		self.bn3 = norm_layer(planes * self.expansion)
+		self.relu = nn.ReLU(inplace=True)
+		self.downsample = downsample
+		self.stride = stride
+	def forward(self, x):
+		identity = x
+		out = self.conv1(x)
+		out = self.bn1(out)
+		out = self.relu(out)
+		out = self.conv2(out)
+		out = self.bn2(out)
+		out = self.relu(out)
+		out = self.conv3(out)
+		out = self.bn3(out)
+		if self.downsample is not None:
+			identity = self.downsample(x)
+		out += identity
+		out = self.relu(out)
+		return out
+class DeconvBasicBlock(nn.Module):
+	def __init__(self, in_planes, stride=1, norm_layer=None):
+		super(DeconvBasicBlock, self).__init__()
+		if norm_layer is None:
+			norm_layer = nn.BatchNorm2d
+		planes = int(in_planes/stride)
+		self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=1, padding=1, bias=False)
+		self.bn2 = norm_layer(in_planes)
+		self.bn1 = norm_layer(planes)
+		if stride == 1:
+			self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+			self.bn1 = norm_layer(planes)
+			self.shortcut = nn.Sequential()
+		else:
+			self.conv1 = nn.ConvTranspose2d(in_planes, planes, kernel_size=3, stride=stride, bias=False, padding=1, output_padding=1)
+			self.bn1 = norm_layer(planes)
+			self.shortcut = nn.Sequential(
+				nn.ConvTranspose2d(in_planes, planes, kernel_size=3, stride=stride, bias=False, padding=1, output_padding=1),
+				norm_layer(planes)
+			)
+	def forward(self, x):
+		out = torch.relu(self.bn2(self.conv2(x)))
+		out = self.bn1(self.conv1(out))
+		out += self.shortcut(x)
+		out = torch.relu(out)
+		return out
+class DeconvBottleneck(nn.Module):
+	def __init__(self, in_channels, out_channels, expansion=2, stride=1, upsample=None, norm_layer=None):
+		super(DeconvBottleneck, self).__init__()
+		if norm_layer is None:
+			norm_layer = nn.BatchNorm2d
+		self.expansion = expansion
+		self.conv1 = nn.Conv2d(in_channels, out_channels,
+							   kernel_size=1, bias=False)
+		self.bn1 = norm_layer(out_channels)
+		if stride == 1:
+			self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3,
+								   stride=stride, bias=False, padding=1)
+		else:
+			self.conv2 = nn.ConvTranspose2d(out_channels, out_channels,
+											kernel_size=3,
+											stride=stride, bias=False,
+											padding=1,
+											output_padding=1)
+		self.bn2 = norm_layer(out_channels)
+		self.conv3 = nn.Conv2d(out_channels, out_channels * self.expansion,
+							   kernel_size=1, bias=False)
+		self.bn3 = norm_layer(out_channels * self.expansion)
+		self.relu = nn.ReLU()
+		self.upsample = upsample
+	def forward(self, x):
+		shortcut = x
+		out = self.conv1(x)
+		out = self.bn1(out)
+		out = self.relu(out)
+		out = self.conv2(out)
+		out = self.bn2(out)
+		out = self.relu(out)
+		out = self.conv3(out)
+		out = self.bn3(out)
+		out = self.relu(out)
+		if self.upsample is not None:
+			shortcut = self.upsample(x)
+		out += shortcut
+		out = self.relu(out)
+		return out
+class ResNet(nn.Module):
+	def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
+				 groups=1, width_per_group=64, replace_stride_with_dilation=None,
+				 norm_layer=None):
+		super(ResNet, self).__init__()
+		if norm_layer is None:
+			norm_layer = nn.BatchNorm2d
+		self._norm_layer = norm_layer
+		self.inplanes = 64
+		self.dilation = 1
+		if replace_stride_with_dilation is None:
+			# each element in the tuple indicates if we should replace
+			# the 2x2 stride with a dilated convolution instead
+			replace_stride_with_dilation = [False, False, False]
+		if len(replace_stride_with_dilation) != 3:
+			raise ValueError("replace_stride_with_dilation should be None "
+							 "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+		self.groups = groups
+		self.base_width = width_per_group
+		self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
+							   bias=False)
+		self.bn1 = norm_layer(self.inplanes)
+		self.relu = nn.ReLU(inplace=True)
+		self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+		self.layer1 = self._make_layer(block, 64, layers[0])
+		self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
+									   dilate=replace_stride_with_dilation[0])
+		self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+									   dilate=replace_stride_with_dilation[1])
+		self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+									   dilate=replace_stride_with_dilation[2])
+		self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+		self.fc = nn.Linear(512 * block.expansion, num_classes)
+		for m in self.modules():
+			if isinstance(m, nn.Conv2d):
+				nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+			elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+		# Zero-initialize the last BN in each residual branch,
+		# so that the residual branch starts with zeros, and each residual block behaves like an identity.
+		# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+		if zero_init_residual:
+			for m in self.modules():
+				if isinstance(m, Bottleneck):
+					nn.init.constant_(m.bn3.weight, 0)
+				elif isinstance(m, BasicBlock):
+					nn.init.constant_(m.bn2.weight, 0)
+	def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+		norm_layer = self._norm_layer
+		downsample = None
+		previous_dilation = self.dilation
+		if dilate:
+			self.dilation *= stride
+			stride = 1
+		if stride != 1 or self.inplanes != planes * block.expansion:
+			downsample = nn.Sequential(
+				conv1x1(self.inplanes, planes * block.expansion, stride),
+				norm_layer(planes * block.expansion),
+			)
+		layers = []
+		layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+							self.base_width, previous_dilation, norm_layer))
+		self.inplanes = planes * block.expansion
+		for _ in range(1, blocks):
+			layers.append(block(self.inplanes, planes, groups=self.groups,
+								base_width=self.base_width, dilation=self.dilation,
+								norm_layer=norm_layer))
+		return nn.Sequential(*layers)
+	def forward(self, x):
+		x = self.conv1(x)
+		x = self.bn1(x)
+		x = self.relu(x)
+		x = self.maxpool(x)
+		x = self.layer1(x)
+		x = self.layer2(x)
+		x = self.layer3(x)
+		x = self.layer4(x)
+		return x
+def _resnet(arch, block, layers, pretrained, progress, **kwargs):
+	model = ResNet(block, layers, **kwargs)
+	if pretrained:
+		state_dict = load_state_dict_from_url(model_urls[arch],
+											  progress=progress)
+		model.load_state_dict(state_dict)
+	return model
+def resnet18(pretrained=False, progress=True, **kwargs):
+	r"""ResNet-18 model from
+	`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>'_
+	Args:
+		pretrained (bool): If True, returns a model pre-trained on ImageNet
+		progress (bool): If True, displays a progress bar of the download to stderr
+	"""
+	return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
+				   **kwargs)
+def resnet34(pretrained=False, progress=True, **kwargs):
+	r"""ResNet-34 model from
+	`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>'_
+	Args:
+		pretrained (bool): If True, returns a model pre-trained on ImageNet
+		progress (bool): If True, displays a progress bar of the download to stderr
+	"""
+	return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
+				   **kwargs)
+def resnet50(pretrained=False, progress=True, **kwargs):
+	r"""ResNet-50 model from
+	`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>'_
+	Args:
+		pretrained (bool): If True, returns a model pre-trained on ImageNet
+		progress (bool): If True, displays a progress bar of the download to stderr
+	"""
+	return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
+				   **kwargs)
+def resnet152(pretrained=False, progress=True, **kwargs):
+	r"""ResNet-152 model from
+	`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>'_
+	Args:
+		pretrained (bool): If True, returns a model pre-trained on ImageNet
+		progress (bool): If True, displays a progress bar of the download to stderr
+	"""
+	return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
+				   **kwargs)
+class ResNetGaze(nn.Module):
+	def __init__(self):
+		raise NotImplementedError
+	def forward(self, x_in):
+		output_dict = {}
+		features = self.feature(x_in)
+		z = self.avgpool(features)
+		z = z.view(z.size(0), -1) ## (batch, dim)
+		pred_gaze = self.fc(z)
+		output_dict['pred_gaze'] = pred_gaze
+		return output_dict
+class Res18(ResNetGaze, nn.Module):
+	def __init__(self,  resnet_pretrained=True):
+		nn.Module.__init__(self)
+		self.feature = resnet18(pretrained=resnet_pretrained)
+		self.avgpool = nn.AdaptiveAvgPool2d((1,1))
+		self.fc = nn.Linear(512, 2)
+class Res50(ResNetGaze, nn.Module):
+	def __init__(self, resnet_pretrained=True):
+		nn.Module.__init__(self)
+		self.feature = resnet50(pretrained=resnet_pretrained)
+		self.avgpool = nn.AdaptiveAvgPool2d((1,1))
+		self.fc = nn.Linear(2048, 2)
+class Res152(ResNetGaze, nn.Module):
+	def __init__(self, resnet_pretrained=True):
+		nn.Module.__init__(self)
+		self.feature = resnet152(pretrained=resnet_pretrained)
+		self.avgpool = nn.AdaptiveAvgPool2d((1,1))
+		self.fc = nn.Linear(2048, 2)

models/vit/mae.py ADDED Viewed

	@@ -0,0 +1,429 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# timm: https://github.com/rwightman/pytorch-image-models/tree/master/timm
+# DeiT: https://github.com/facebookresearch/deit
+# --------------------------------------------------------
+from functools import partial
+import torch
+import torch.nn as nn
+from timm.models.vision_transformer import PatchEmbed, Block
+# from util.pos_embed import get_2d_sincos_pos_embed
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# Position embedding utils
+# --------------------------------------------------------
+import numpy as np
+import torch
+# --------------------------------------------------------
+# 2D sine-cosine position embedding
+# References:
+# Transformer: https://github.com/tensorflow/models/blob/master/official/nlp/transformer/model_utils.py
+# MoCo v3: https://github.com/facebookresearch/moco-v3
+# --------------------------------------------------------
+def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False):
+    """
+    grid_size: int of the grid height and width
+    return:
+    pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
+    """
+    grid_h = np.arange(grid_size, dtype=np.float32)
+    grid_w = np.arange(grid_size, dtype=np.float32)
+    grid = np.meshgrid(grid_w, grid_h)  # here w goes first
+    grid = np.stack(grid, axis=0)
+    grid = grid.reshape([2, 1, grid_size, grid_size])
+    pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
+    if cls_token:
+        pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0)
+    return pos_embed
+def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
+    assert embed_dim % 2 == 0
+    # use half of dimensions to encode grid_h
+    emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0])  # (H*W, D/2)
+    emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1])  # (H*W, D/2)
+    emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D)
+    return emb
+def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
+    """
+    embed_dim: output dimension for each position
+    pos: a list of positions to be encoded: size (M,)
+    out: (M, D)
+    """
+    assert embed_dim % 2 == 0
+    omega = np.arange(embed_dim // 2, dtype=float)
+    omega /= embed_dim / 2.
+    omega = 1. / 10000**omega  # (D/2,)
+    pos = pos.reshape(-1)  # (M,)
+    out = np.einsum('m,d->md', pos, omega)  # (M, D/2), outer product
+    emb_sin = np.sin(out) # (M, D/2)
+    emb_cos = np.cos(out) # (M, D/2)
+    emb = np.concatenate([emb_sin, emb_cos], axis=1)  # (M, D)
+    return emb
+# --------------------------------------------------------
+# Interpolate position embeddings for high-resolution
+# References:
+# DeiT: https://github.com/facebookresearch/deit
+# --------------------------------------------------------
+def interpolate_pos_embed(model, checkpoint_model):
+    if 'pos_embed' in checkpoint_model:
+        pos_embed_checkpoint = checkpoint_model['pos_embed']
+        embedding_size = pos_embed_checkpoint.shape[-1]
+        num_patches = model.patch_embed.num_patches
+        num_extra_tokens = model.pos_embed.shape[-2] - num_patches
+        # height (== width) for the checkpoint position embedding
+        orig_size = int((pos_embed_checkpoint.shape[-2] - num_extra_tokens) ** 0.5)
+        # height (== width) for the new position embedding
+        new_size = int(num_patches ** 0.5)
+        # class_token and dist_token are kept unchanged
+        if orig_size != new_size:
+            print("Position interpolate from %dx%d to %dx%d" % (orig_size, orig_size, new_size, new_size))
+            extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens]
+            # only the position tokens are interpolated
+            pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:]
+            pos_tokens = pos_tokens.reshape(-1, orig_size, orig_size, embedding_size).permute(0, 3, 1, 2)
+            pos_tokens = torch.nn.functional.interpolate(
+                pos_tokens, size=(new_size, new_size), mode='bicubic', align_corners=False)
+            pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2)
+            new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1)
+            checkpoint_model['pos_embed'] = new_pos_embed
+class MaskedAutoencoderViT(nn.Module):
+    """ Masked Autoencoder with VisionTransformer backbone
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3,
+                 embed_dim=1024, depth=24, num_heads=16,
+                 decoder_embed_dim=512, decoder_depth=8, decoder_num_heads=16,
+                 mlp_ratio=4., norm_layer=nn.LayerNorm, norm_pix_loss=False):
+        super().__init__()
+        # --------------------------------------------------------------------------
+        # MAE encoder specifics
+        self.patch_embed = PatchEmbed(img_size, patch_size, in_chans, embed_dim)
+        num_patches = self.patch_embed.num_patches
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim), requires_grad=False)  # fixed sin-cos embedding
+        self.blocks = nn.ModuleList([
+            # Block(embed_dim, num_heads, mlp_ratio, qkv_bias=True, qk_scale=None, norm_layer=norm_layer)
+            Block(embed_dim, num_heads, mlp_ratio, qkv_bias=True, norm_layer=norm_layer)
+            for i in range(depth)])
+        self.norm = norm_layer(embed_dim)
+        # --------------------------------------------------------------------------
+        # --------------------------------------------------------------------------
+        # MAE decoder specifics
+        self.decoder_embed = nn.Linear(embed_dim, decoder_embed_dim, bias=True)
+        self.mask_token = nn.Parameter(torch.zeros(1, 1, decoder_embed_dim))
+        self.decoder_pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, decoder_embed_dim), requires_grad=False)  # fixed sin-cos embedding
+        self.decoder_blocks = nn.ModuleList([
+            # Block(decoder_embed_dim, decoder_num_heads, mlp_ratio, qkv_bias=True, qk_scale=None, norm_layer=norm_layer)
+            Block(decoder_embed_dim, decoder_num_heads, mlp_ratio, qkv_bias=True, norm_layer=norm_layer)
+            for i in range(decoder_depth)])
+        self.decoder_norm = norm_layer(decoder_embed_dim)
+        self.decoder_pred = nn.Linear(decoder_embed_dim, patch_size**2 * in_chans, bias=True) # decoder to patch
+        # --------------------------------------------------------------------------
+        self.norm_pix_loss = norm_pix_loss
+        self.initialize_weights()
+    def initialize_weights(self):
+        # initialization
+        # initialize (and freeze) pos_embed by sin-cos embedding
+        pos_embed = get_2d_sincos_pos_embed(self.pos_embed.shape[-1], int(self.patch_embed.num_patches**.5), cls_token=True)
+        self.pos_embed.data.copy_(torch.from_numpy(pos_embed).float().unsqueeze(0))
+        decoder_pos_embed = get_2d_sincos_pos_embed(self.decoder_pos_embed.shape[-1], int(self.patch_embed.num_patches**.5), cls_token=True)
+        self.decoder_pos_embed.data.copy_(torch.from_numpy(decoder_pos_embed).float().unsqueeze(0))
+        # initialize patch_embed like nn.Linear (instead of nn.Conv2d)
+        w = self.patch_embed.proj.weight.data
+        torch.nn.init.xavier_uniform_(w.view([w.shape[0], -1]))
+        # timm's trunc_normal_(std=.02) is effectively normal_(std=0.02) as cutoff is too big (2.)
+        torch.nn.init.normal_(self.cls_token, std=.02)
+        torch.nn.init.normal_(self.mask_token, std=.02)
+        # initialize nn.Linear and nn.LayerNorm
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            # we use xavier_uniform following official JAX ViT:
+            torch.nn.init.xavier_uniform_(m.weight)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    def patchify(self, imgs):
+        """
+        imgs: (N, 3, H, W)
+        x: (N, L, patch_size**2 *3)
+        """
+        p = self.patch_embed.patch_size[0]
+        assert imgs.shape[2] == imgs.shape[3] and imgs.shape[2] % p == 0
+        h = w = imgs.shape[2] // p
+        x = imgs.reshape(shape=(imgs.shape[0], 3, h, p, w, p))
+        x = torch.einsum('nchpwq->nhwpqc', x)
+        x = x.reshape(shape=(imgs.shape[0], h * w, p**2 * 3))
+        return x
+    def unpatchify(self, x):
+        """
+        x: (N, L, patch_size**2 *3)
+        imgs: (N, 3, H, W)
+        """
+        p = self.patch_embed.patch_size[0]
+        h = w = int(x.shape[1]**.5)
+        assert h * w == x.shape[1]
+        x = x.reshape(shape=(x.shape[0], h, w, p, p, 3))
+        x = torch.einsum('nhwpqc->nchpwq', x)
+        imgs = x.reshape(shape=(x.shape[0], 3, h * p, h * p))
+        return imgs
+    def random_masking(self, x, mask_ratio):
+        """
+        Perform per-sample random masking by per-sample shuffling.
+        Per-sample shuffling is done by argsort random noise.
+        x: [N, L, D], sequence
+        """
+        N, L, D = x.shape  # batch, length, dim
+        len_keep = int(L * (1 - mask_ratio))
+        noise = torch.rand(N, L, device=x.device)  # noise in [0, 1]
+        # sort noise for each sample
+        ids_shuffle = torch.argsort(noise, dim=1)  # ascend: small is keep, large is remove
+        ids_restore = torch.argsort(ids_shuffle, dim=1)
+        # keep the first subset
+        ids_keep = ids_shuffle[:, :len_keep]
+        x_masked = torch.gather(x, dim=1, index=ids_keep.unsqueeze(-1).repeat(1, 1, D))
+        # generate the binary mask: 0 is keep, 1 is remove
+        mask = torch.ones([N, L], device=x.device)
+        mask[:, :len_keep] = 0
+        # unshuffle to get the binary mask
+        mask = torch.gather(mask, dim=1, index=ids_restore)
+        return x_masked, mask, ids_restore
+    def forward_encoder(self, x, mask_ratio):
+        # embed patches
+        x = self.patch_embed(x)
+        # add pos embed w/o cls token
+        x = x + self.pos_embed[:, 1:, :]
+        # masking: length -> length * mask_ratio
+        x, mask, ids_restore = self.random_masking(x, mask_ratio)
+        # append cls token
+        cls_token = self.cls_token + self.pos_embed[:, :1, :]
+        cls_tokens = cls_token.expand(x.shape[0], -1, -1)
+        x = torch.cat((cls_tokens, x), dim=1)
+        # apply Transformer blocks
+        for blk in self.blocks:
+            x = blk(x)
+        x = self.norm(x)
+        return x, mask, ids_restore
+    def forward_decoder(self, x, ids_restore):
+        # embed tokens
+        x = self.decoder_embed(x)
+        # append mask tokens to sequence
+        mask_tokens = self.mask_token.repeat(x.shape[0], ids_restore.shape[1] + 1 - x.shape[1], 1)
+        x_ = torch.cat([x[:, 1:, :], mask_tokens], dim=1)  # no cls token
+        x_ = torch.gather(x_, dim=1, index=ids_restore.unsqueeze(-1).repeat(1, 1, x.shape[2]))  # unshuffle
+        x = torch.cat([x[:, :1, :], x_], dim=1)  # append cls token
+        # add pos embed
+        x = x + self.decoder_pos_embed
+        # apply Transformer blocks
+        for blk in self.decoder_blocks:
+            x = blk(x)
+        x = self.decoder_norm(x)
+        # predictor projection
+        x = self.decoder_pred(x)
+        # remove cls token
+        x = x[:, 1:, :]
+        return x
+    def forward_loss(self, imgs, pred, mask):
+        """
+        imgs: [N, 3, H, W]
+        pred: [N, L, p*p*3]
+        mask: [N, L], 0 is keep, 1 is remove,
+        """
+        target = self.patchify(imgs)
+        if self.norm_pix_loss:
+            mean = target.mean(dim=-1, keepdim=True)
+            var = target.var(dim=-1, keepdim=True)
+            target = (target - mean) / (var + 1.e-6)**.5
+        loss = (pred - target) ** 2
+        loss = loss.mean(dim=-1)  # [N, L], mean loss per patch
+        loss = (loss * mask).sum() / mask.sum()  # mean loss on removed patches
+        return loss
+    def forward(self, imgs, mask_ratio=0.75):
+        latent, mask, ids_restore = self.forward_encoder(imgs, mask_ratio)
+        pred = self.forward_decoder(latent, ids_restore)  # [N, L, p*p*3]
+        loss = self.forward_loss(imgs, pred, mask)
+        return loss, pred, mask
+def mae_vit_base_patch16_dec512d8b(**kwargs):
+    model = MaskedAutoencoderViT(
+        patch_size=16, embed_dim=768, depth=12, num_heads=12,
+        decoder_embed_dim=512, decoder_depth=8, decoder_num_heads=16,
+        mlp_ratio=4, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    return model
+def mae_vit_large_patch16_dec512d8b(**kwargs):
+    model = MaskedAutoencoderViT(
+        patch_size=16, embed_dim=1024, depth=24, num_heads=16,
+        decoder_embed_dim=512, decoder_depth=8, decoder_num_heads=16,
+        mlp_ratio=4, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    return model
+def mae_vit_huge_patch14_dec512d8b(**kwargs):
+    model = MaskedAutoencoderViT(
+        patch_size=14, embed_dim=1280, depth=32, num_heads=16,
+        decoder_embed_dim=512, decoder_depth=8, decoder_num_heads=16,
+        mlp_ratio=4, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    return model
+# set recommended archs
+mae_vit_base_patch16 = mae_vit_base_patch16_dec512d8b  # decoder: 512 dim, 8 blocks
+mae_vit_large_patch16 = mae_vit_large_patch16_dec512d8b  # decoder: 512 dim, 8 blocks
+mae_vit_huge_patch14 = mae_vit_huge_patch14_dec512d8b  # decoder: 512 dim, 8 blocks
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# timm: https://github.com/rwightman/pytorch-image-models/tree/master/timm
+# DeiT: https://github.com/facebookresearch/deit
+# --------------------------------------------------------
+from functools import partial
+import torch
+import torch.nn as nn
+import timm.models.vision_transformer
+class VisionTransformer(timm.models.vision_transformer.VisionTransformer):
+    """ Vision Transformer with support for global average pooling
+    """
+    def __init__(self, global_pool=False, **kwargs):
+        super(VisionTransformer, self).__init__(**kwargs)
+        self.global_pool = global_pool
+        if self.global_pool:
+            norm_layer = kwargs['norm_layer']
+            embed_dim = kwargs['embed_dim']
+            self.fc_norm = norm_layer(embed_dim)
+            del self.norm  # remove the original norm
+    def forward_features(self, x):
+        B = x.shape[0]
+        x = self.patch_embed(x)
+        cls_tokens = self.cls_token.expand(B, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
+        x = torch.cat((cls_tokens, x), dim=1)
+        x = x + self.pos_embed
+        x = self.pos_drop(x)
+        for blk in self.blocks:
+            x = blk(x)
+        if self.global_pool:
+            x = x[:, 1:, :].mean(dim=1)  # global pool without cls token
+            outcome = self.fc_norm(x)
+        else:
+            x = self.norm(x)
+            outcome = x[:, 0]
+        return outcome
+def vit_base_patch16(**kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    return model
+def vit_large_patch16(**kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=1024, depth=24, num_heads=16, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    return model
+def vit_huge_patch14(**kwargs):
+    model = VisionTransformer(
+        patch_size=14, embed_dim=1280, depth=32, num_heads=16, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    return model

models/vit/mae_gaze.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from os import replace
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.optim.lr_scheduler import StepLR
+import torch.utils.model_zoo as model_zoo
+from torch.utils.model_zoo import load_url as load_state_dict_from_url
+from functools import partial
+from torchvision.models import vit_b_16, vit_b_32, vit_l_16, vit_l_32
+from models.vit.mae import interpolate_pos_embed, MaskedAutoencoderViT, vit_base_patch16, vit_large_patch16, vit_huge_patch14
+class MAE_Gaze(nn.Module):
+	def __init__(self, model_type='vit_b_16', global_pool=False, drop_path_rate=0.1,
+			  custom_pretrained_path=None):
+		super().__init__()
+		if model_type == "vit_b_16":
+			self.vit = vit_base_patch16( global_pool=global_pool, drop_path_rate=drop_path_rate)
+		elif model_type == "vit_l_16":
+			self.vit = vit_large_patch16( global_pool=global_pool, drop_path_rate=drop_path_rate)
+		elif model_type == "vit_h_14":
+			self.vit = vit_huge_patch14( global_pool=global_pool, drop_path_rate=drop_path_rate)
+		else:
+			raise ValueError('model_type not supported')
+		if custom_pretrained_path is not None:
+			checkpoint_model = torch.load(custom_pretrained_path, map_location='cpu')['model']
+			state_dict = self.vit.state_dict()
+			for k in  ['head.weight', 'head.bias']:
+				if k in checkpoint_model and checkpoint_model[k].shape != state_dict[k].shape:
+					print(f"Removing key {k} from pretrained checkpoint")
+					del checkpoint_model[k]
+			# interpolate position embedding
+			interpolate_pos_embed(self.vit, checkpoint_model)
+			# keys_in_ckpt = checkpoint_model.keys()
+			# print('Keys in ckpt: ', keys_in_ckpt)
+			self.vit.load_state_dict( checkpoint_model, strict=False)
+			print('Loaded custom pretrained weights from {}'.format(custom_pretrained_path))
+		# del self.decoder_embed
+		# del self.mask_token
+		# del self.decoder_pos_embed
+		# del self.decoder_blocks
+		# del self.decoder_norm
+		# del self.decoder_pred
+		embed_dim = self.vit.embed_dim
+		self.gaze_fc = nn.Linear(embed_dim, 2)
+	def forward(self, input):
+		features = self.vit.forward_features(input)
+		pred_gaze = self.gaze_fc(features)
+		output_dict = {}
+		output_dict['pred_gaze'] = pred_gaze
+		return output_dict

models/vit/vit_gaze.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from os import replace
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.optim.lr_scheduler import StepLR
+import torch.utils.model_zoo as model_zoo
+from torch.utils.model_zoo import load_url as load_state_dict_from_url
+from functools import partial
+from torchvision.models import vit_b_16, vit_b_32, vit_l_16, vit_l_32
+class ViTGaze(nn.Module):
+	def __init__(self,
+				vit_type="b_16",
+				pretrained=True,
+				custom_pretrained_path=None,
+				**kwargs
+				):
+		super().__init__()
+		if vit_type == "b_16":
+			"""
+			patch_size=16,
+			num_layers=12,
+			num_heads=12,
+			hidden_dim=768,
+			mlp_dim=3072,
+			"""
+			self.vit = vit_b_16(pretrained=pretrained )
+			self.vit.heads = nn.Sequential(
+				nn.Linear(768,2)
+			)
+		elif vit_type == "b_32":
+			self.vit = vit_b_32(pretrained=pretrained)
+			self.vit.heads = nn.Sequential(
+				nn.Linear(768,2)
+			)
+		elif vit_type == "l_16":
+			self.vit = vit_l_16(pretrained=pretrained)
+			self.vit.heads = nn.Sequential(
+				nn.Linear(1024,2)
+			)
+		elif vit_type == "l_32":
+			self.vit = vit_l_32(pretrained=pretrained)
+			self.vit.heads = nn.Sequential(
+				nn.Linear(1024,2)
+			)
+		if custom_pretrained_path is not None:
+			ckpt = torch.load(custom_pretrained_path)
+			print('Loading custom pretrained weights from: ', custom_pretrained_path)
+			# self.vit.load_state_dict( ckpt['model'], strict=True)
+			keys_in_ckpt = ckpt.keys()
+			print('Keys in ckpt: ', keys_in_ckpt)
+			self.vit.load_state_dict( ckpt, strict=True)
+	def forward(self, x_in):
+		out_dict = {}
+		gaze = self.vit(x_in)
+		out_dict['pred_gaze'] = gaze
+		return out_dict
+from models.vit.mae import interpolate_pos_embed, vit_huge_patch14
+class CustomViT_H14(nn.Module):
+	def __init__(self, global_pool=False, drop_path_rate=0.1,
+			  custom_pretrained_path=None):
+		super().__init__()
+		self.vit = vit_huge_patch14( global_pool=global_pool, drop_path_rate=drop_path_rate)
+		if custom_pretrained_path is not None:
+			checkpoint_model = torch.load(custom_pretrained_path, map_location='cpu')
+			state_dict = self.vit.state_dict()
+			for k in  ['head.weight', 'head.bias']:
+				if k in checkpoint_model and checkpoint_model[k].shape != state_dict[k].shape:
+					print(f"Removing key {k} from pretrained checkpoint")
+					del checkpoint_model[k]
+			# interpolate position embedding
+			interpolate_pos_embed(self.vit, checkpoint_model)
+			self.vit.load_state_dict( checkpoint_model, strict=False )
+			print('Loaded custom pretrained weights from {}'.format(custom_pretrained_path))
+		embed_dim = self.vit.embed_dim
+		self.gaze_fc = nn.Linear(embed_dim, 2)
+	def forward(self, input):
+		features = self.vit.forward_features(input)
+		pred_gaze = self.gaze_fc(features)
+		output_dict = {}
+		output_dict['pred_gaze'] = pred_gaze
+		return output_dict

unigaze/__init__.py ADDED Viewed

File without changes

unigaze/configs/config.yaml ADDED Viewed

	@@ -0,0 +1,38 @@

+defaults:
+  - _self_
+  - exp: exp_224
+mode: train
+random_seed: 42
+num_workers: 20
+test_per_epoch: 1
+print_freq: 100
+data_sanity_check: false
+log_wandb: false
+output_dir: "./logs"
+ckpt_resume: null
+pretrain_ckptpath: null
+optimizer_cfg: null
+scheduler_cfg: null
+batch_size: 50
+test_batch_size: 200
+epochs: 25
+valid_epoch: 1
+eval_epoch: 10
+save_epoch: 10
+use_autocast: False
+batchnorm_type:
+  label: clean
+  unlabel: aug
+  test: clean

unigaze/configs/data/eyediap_cs.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+type: datasets.eyediap.EYEDIAPDataset
+params:
+  data_name: eyediap_cs
+  color_type: bgr
+  transform_type: 'basic_imagenet'
+  image_size: 224
+  dataset_path: null
+  keys_to_use:
+    - 'person_1.h5'
+    - 'person_2.h5'
+    - 'person_3.h5'
+    - 'person_4.h5'
+    - 'person_5.h5'
+    - 'person_6.h5'
+    - 'person_7.h5'
+    - 'person_8.h5'
+    - 'person_9.h5'
+    - 'person_10.h5'
+    - 'person_11.h5'
+    - 'person_14.h5'
+    - 'person_15.h5'
+    - 'person_16.h5'

unigaze/configs/data/eyediap_cs_test.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+type: datasets.eyediap.EYEDIAPDataset
+params:
+  data_name: eyediap_cs
+  color_type: bgr
+  transform_type: 'basic_imagenet'
+  image_size: 224
+  dataset_path: null
+  keys_to_use:
+    # - 'person_1.h5'
+    # - 'person_2.h5'
+    # - 'person_3.h5'
+    # - 'person_4.h5'
+    # - 'person_5.h5'
+    # - 'person_6.h5'
+    # - 'person_7.h5'
+    # - 'person_8.h5'
+    - 'person_9.h5'
+    - 'person_10.h5'
+    - 'person_11.h5'
+    - 'person_14.h5'
+    - 'person_15.h5'
+    - 'person_16.h5'

unigaze/configs/data/eyediap_cs_train.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+type: datasets.eyediap.EYEDIAPDataset
+params:
+  data_name: eyediap_cs
+  color_type: bgr
+  transform_type: 'basic_imagenet'
+  image_size: 224
+  dataset_path: null
+  keys_to_use:
+    - 'person_1.h5'
+    - 'person_2.h5'
+    - 'person_3.h5'
+    - 'person_4.h5'
+    - 'person_5.h5'
+    - 'person_6.h5'
+    - 'person_7.h5'
+    - 'person_8.h5'
+    # - 'person_9.h5'
+    # - 'person_10.h5'
+    # - 'person_11.h5'
+    # - 'person_14.h5'
+    # - 'person_15.h5'
+    # - 'person_16.h5'

unigaze/configs/data/eyediap_ft.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+type: datasets.eyediap.EYEDIAPDataset
+params:
+  data_name: eyediap_ft
+  color_type: bgr
+  transform_type: 'basic_imagenet'
+  image_size: 224
+  dataset_path: null
+  keys_to_use:
+    - 'person_1.h5'
+    - 'person_2.h5'
+    - 'person_3.h5'
+    - 'person_4.h5'
+    - 'person_5.h5'
+    - 'person_6.h5'
+    - 'person_7.h5'
+    - 'person_8.h5'
+    - 'person_9.h5'
+    - 'person_10.h5'
+    - 'person_11.h5'
+    - 'person_12.h5'
+    - 'person_13.h5'
+    - 'person_14.h5'
+    - 'person_15.h5'
+    - 'person_16.h5'

unigaze/configs/data/eyediap_ft_test.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+type: datasets.eyediap.EYEDIAPDataset
+params:
+  data_name: eyediap_ft
+  color_type: bgr
+  transform_type: 'basic_imagenet'
+  image_size: 224
+  dataset_path: null
+  keys_to_use:
+    # - 'person_1.h5'
+    # - 'person_2.h5'
+    # - 'person_3.h5'
+    # - 'person_4.h5'
+    # - 'person_5.h5'
+    # - 'person_6.h5'
+    # - 'person_7.h5'
+    # - 'person_8.h5'
+    - 'person_9.h5'
+    - 'person_10.h5'
+    - 'person_11.h5'
+    - 'person_12.h5'
+    - 'person_13.h5'
+    - 'person_14.h5'
+    - 'person_15.h5'
+    - 'person_16.h5'

unigaze/configs/data/eyediap_ft_train.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+type: datasets.eyediap.EYEDIAPDataset
+params:
+  data_name: eyediap_ft
+  color_type: bgr
+  transform_type: 'basic_imagenet'
+  image_size: 224
+  dataset_path: null
+  keys_to_use:
+    - 'person_1.h5'
+    - 'person_2.h5'
+    - 'person_3.h5'
+    - 'person_4.h5'
+    - 'person_5.h5'
+    - 'person_6.h5'
+    - 'person_7.h5'
+    - 'person_8.h5'
+    # - 'person_9.h5'
+    # - 'person_10.h5'
+    # - 'person_11.h5'
+    # - 'person_12.h5'
+    # - 'person_13.h5'
+    # - 'person_14.h5'
+    # - 'person_15.h5'
+    # - 'person_16.h5'

unigaze/configs/data/gaze360_test.yaml ADDED Viewed

	@@ -0,0 +1,74 @@

+type: datasets.gaze360.Gaze360Dataset
+params:
+  data_name: gaze360_224_test
+  saved_norm_config:
+    focal_norm: 960
+    distance_norm: 600
+    roi_size: [224, 224]
+  norm_config:
+      focal_norm: 960
+      distance_norm: 600
+      roi_size: [224, 224]
+  color_type: bgr
+  transform_type: 'basic_imagenet'
+  dataset_path: null
+  image_size: 224
+  sample_rate_use: 1
+  whether_crop_resize: False
+  keys_to_use:
+    - 000000.h5
+    - 000001.h5
+    - 000002.h5
+    - 000003.h5
+    - 000004.h5
+    - 000010.h5
+    - 000014.h5
+    - 000022.h5
+    - 000031.h5
+    - 000032.h5
+    - 000044.h5
+    - 000045.h5
+    - 000057.h5
+    - 000058.h5
+    - 000070.h5
+    - 000078.h5
+    - 000270.h5
+    - 000277.h5
+    - 000278.h5
+    - 000279.h5
+    - 000316.h5
+    - 000364.h5
+    - 000367.h5
+    - 000511.h5
+    - 000512.h5
+    - 000513.h5
+    - 000515.h5
+    - 000527.h5
+    - 000536.h5
+    - 000543.h5
+    - 000579.h5
+    - 000584.h5
+    - 000585.h5
+    - 000600.h5
+    - 000603.h5
+    - 000604.h5
+    - 000611.h5
+    - 000614.h5
+    - 000615.h5
+    - 000616.h5
+    - 000649.h5
+    - 000650.h5
+    - 000651.h5
+    - 000652.h5
+    - 000687.h5
+    - 000723.h5
+    - 000777.h5
+    - 000782.h5
+    - 000823.h5
+    - 000907.h5
+    - 000909.h5
+    - 000982.h5

unigaze/configs/data/gaze360_train.yaml ADDED Viewed

	@@ -0,0 +1,252 @@

+type: datasets.gaze360.Gaze360Dataset
+params:
+  data_name: gaze360_224_train
+  saved_norm_config:
+    focal_norm: 960
+    distance_norm: 600
+    roi_size: [224, 224]
+  norm_config:
+      focal_norm: 960
+      distance_norm: 600
+      roi_size: [224, 224]
+  color_type: bgr
+  transform_type: 'basic_imagenet'
+  dataset_path: null
+  image_size: 224
+  sample_rate_use: 1
+  keys_to_use:
+    - 000000.h5
+    - 000001.h5
+    - 000002.h5
+    - 000003.h5
+    - 000004.h5
+    - 000006.h5
+    - 000007.h5
+    - 000009.h5
+    - 000010.h5
+    - 000011.h5
+    - 000013.h5
+    - 000016.h5
+    - 000019.h5
+    - 000020.h5
+    - 000029.h5
+    - 000030.h5
+    - 000031.h5
+    - 000032.h5
+    - 000034.h5
+    - 000035.h5
+    - 000038.h5
+    - 000039.h5
+    - 000043.h5
+    - 000048.h5
+    - 000049.h5
+    - 000050.h5
+    - 000058.h5
+    - 000060.h5
+    - 000061.h5
+    - 000062.h5
+    - 000063.h5
+    - 000072.h5
+    - 000073.h5
+    - 000074.h5
+    - 000075.h5
+    - 000076.h5
+    - 000077.h5
+    - 000081.h5
+    - 000083.h5
+    - 000084.h5
+    - 000085.h5
+    - 000090.h5
+    - 000093.h5
+    - 000094.h5
+    - 000099.h5
+    - 000109.h5
+    - 000111.h5
+    - 000112.h5
+    - 000116.h5
+    - 000122.h5
+    - 000134.h5
+    - 000146.h5
+    - 000148.h5
+    - 000149.h5
+    - 000150.h5
+    - 000151.h5
+    - 000152.h5
+    - 000154.h5
+    - 000156.h5
+    - 000158.h5
+    - 000159.h5
+    - 000160.h5
+    - 000161.h5
+    - 000162.h5
+    - 000165.h5
+    - 000166.h5
+    - 000170.h5
+    - 000171.h5
+    - 000172.h5
+    - 000184.h5
+    - 000185.h5
+    - 000186.h5
+    - 000187.h5
+    - 000188.h5
+    - 000189.h5
+    - 000190.h5
+    - 000202.h5
+    - 000205.h5
+    - 000206.h5
+    - 000208.h5
+    - 000214.h5
+    - 000216.h5
+    - 000217.h5
+    - 000219.h5
+    - 000220.h5
+    - 000221.h5
+    - 000222.h5
+    - 000228.h5
+    - 000237.h5
+    - 000248.h5
+    - 000250.h5
+    - 000255.h5
+    - 000256.h5
+    - 000257.h5
+    - 000258.h5
+    - 000262.h5
+    - 000278.h5
+    - 000283.h5
+    - 000284.h5
+    - 000287.h5
+    - 000288.h5
+    - 000297.h5
+    - 000298.h5
+    - 000299.h5
+    - 000300.h5
+    - 000324.h5
+    - 000368.h5
+    - 000369.h5
+    - 000408.h5
+    - 000409.h5
+    - 000410.h5
+    - 000418.h5
+    - 000440.h5
+    - 000441.h5
+    - 000449.h5
+    - 000457.h5
+    - 000458.h5
+    - 000459.h5
+    - 000460.h5
+    - 000461.h5
+    - 000463.h5
+    - 000494.h5
+    - 000501.h5
+    - 000502.h5
+    - 000509.h5
+    - 000510.h5
+    - 000511.h5
+    - 000512.h5
+    - 000513.h5
+    - 000514.h5
+    - 000515.h5
+    - 000517.h5
+    - 000519.h5
+    - 000529.h5
+    - 000541.h5
+    - 000547.h5
+    - 000548.h5
+    - 000549.h5
+    - 000550.h5
+    - 000551.h5
+    - 000552.h5
+    - 000565.h5
+    - 000566.h5
+    - 000569.h5
+    - 000571.h5
+    - 000573.h5
+    - 000574.h5
+    - 000586.h5
+    - 000587.h5
+    - 000588.h5
+    - 000589.h5
+    - 000592.h5
+    - 000597.h5
+    - 000603.h5
+    - 000604.h5
+    - 000605.h5
+    - 000611.h5
+    - 000613.h5
+    - 000617.h5
+    - 000620.h5
+    - 000623.h5
+    - 000634.h5
+    - 000635.h5
+    - 000636.h5
+    - 000639.h5
+    - 000640.h5
+    - 000641.h5
+    - 000642.h5
+    - 000643.h5
+    - 000644.h5
+    - 000645.h5
+    - 000650.h5
+    - 000656.h5
+    - 000658.h5
+    - 000659.h5
+    - 000660.h5
+    - 000661.h5
+    - 000662.h5
+    - 000670.h5
+    - 000671.h5
+    - 000677.h5
+    - 000683.h5
+    - 000714.h5
+    - 000721.h5
+    - 000723.h5
+    - 000738.h5
+    - 000741.h5
+    - 000742.h5
+    - 000744.h5
+    - 000751.h5
+    - 000755.h5
+    - 000761.h5
+    - 000762.h5
+    - 000763.h5
+    - 000764.h5
+    - 000765.h5
+    - 000768.h5
+    - 000777.h5
+    - 000779.h5
+    - 000780.h5
+    - 000781.h5
+    - 000783.h5
+    - 000786.h5
+    - 000787.h5
+    - 000789.h5
+    - 000800.h5
+    - 000801.h5
+    - 000802.h5
+    - 000803.h5
+    - 000813.h5
+    - 000815.h5
+    - 000816.h5
+    - 000831.h5
+    - 000834.h5
+    - 000835.h5
+    - 000838.h5
+    - 000861.h5
+    - 000862.h5
+    - 000899.h5
+    - 000900.h5
+    - 000916.h5
+    - 000918.h5
+    - 000923.h5
+    - 000935.h5
+    - 000946.h5
+    - 000971.h5
+    - 000978.h5
+    - 000990.h5
+    - 000991.h5
+    - 001092.h5

unigaze/configs/data/gazecapture_test.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+type: datasets.gazecapture.GazeCaptureDataset
+params:
+  data_name: gazecapture_test
+  color_type: rgb
+  transform_type: 'basic_imagenet'
+  image_size: 224
+  dataset_path: null
+  keys_to_use: [ "00010.h5", "00110.h5", "00126.h5", "00178.h5", "00190.h5", "00192.h5", "00220.h5", "00222.h5", "00233.h5", "00319.h5", "00330.h5", "00343.h5", "00382.h5", "00460.h5", "00509.h5", "00511.h5", "00546.h5", "00563.h5", "00580.h5", "00585.h5",
+    "00611.h5", "00616.h5", "00619.h5", "00646.h5", "00654.h5", "00680.h5", "00686.h5", "00700.h5", "00721.h5", "00741.h5", "00777.h5", "00796.h5", "00868.h5", "00921.h5", "00935.h5", "00949.h5", "00953.h5",
+    "00965.h5", "00968.h5", "01036.h5", "01041.h5", "01051.h5", "01091.h5", "01148.h5", "01152.h5", "01155.h5", "01183.h5", "01200.h5", "01273.h5", "01278.h5", "01286.h5", "01326.h5", "01329.h5", "01370.h5", "01376.h5", "01425.h5", "01457.h5", "01477.h5",
+    "01506.h5", "01517.h5", "01525.h5", "01575.h5", "01625.h5", "01672.h5", "01674.h5", "01689.h5", "01782.h5", "01794.h5", "01813.h5", "01830.h5", "01855.h5", "01863.h5", "01877.h5", "01893.h5", "01941.h5", "01959.h5", "01978.h5",
+    "01983.h5", "01985.h5", "01997.h5", "02006.h5", "02020.h5", "02043.h5", "02078.h5", "02091.h5", "02109.h5", "02197.h5", "02213.h5", "02239.h5", "02240.h5", "02269.h5", "02275.h5", "02281.h5", "02292.h5",
+    "02301.h5", "02348.h5", "02413.h5", "02419.h5", "02450.h5", "02455.h5", "02461.h5", "02480.h5", "02536.h5", "02601.h5", "02734.h5", "02755.h5", "02756.h5", "02805.h5", "02833.h5", "02851.h5", "02885.h5", "02899.h5", "02942.h5", "02966.h5",
+    "02986.h5", "03011.h5", "03024.h5", "03043.h5", "03117.h5", "03126.h5", "03140.h5", "03177.h5", "03183.h5", "03185.h5", "03202.h5", "03216.h5", "03223.h5", "03247.h5", "03270.h5", "03324.h5", "03326.h5", "03344.h5", "03352.h5", "03361.h5", "03366.h5",
+    "03404.h5", "03412.h5", "03451.h5", "03523.h5"]

unigaze/configs/data/gazecapture_test_ds15.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+type: datasets.gazecapture.GazeCaptureDataset
+params:
+  data_name: gazecapture_test
+  color_type: rgb
+  transform_type: 'basic_imagenet'
+  image_size: 224
+  sample_rate_use: 15
+  dataset_path: null
+  keys_to_use: [ "00010.h5", "00110.h5", "00126.h5", "00178.h5", "00190.h5", "00192.h5", "00220.h5", "00222.h5", "00233.h5", "00319.h5", "00330.h5", "00343.h5", "00382.h5", "00460.h5", "00509.h5", "00511.h5", "00546.h5", "00563.h5", "00580.h5", "00585.h5",
+    "00611.h5", "00616.h5", "00619.h5", "00646.h5", "00654.h5", "00680.h5", "00686.h5", "00700.h5", "00721.h5", "00741.h5", "00777.h5", "00796.h5", "00868.h5", "00921.h5", "00935.h5", "00949.h5", "00953.h5",
+    "00965.h5", "00968.h5", "01036.h5", "01041.h5", "01051.h5", "01091.h5", "01148.h5", "01152.h5", "01155.h5", "01183.h5", "01200.h5", "01273.h5", "01278.h5", "01286.h5", "01326.h5", "01329.h5", "01370.h5", "01376.h5", "01425.h5", "01457.h5", "01477.h5",
+    "01506.h5", "01517.h5", "01525.h5", "01575.h5", "01625.h5", "01672.h5", "01674.h5", "01689.h5", "01782.h5", "01794.h5", "01813.h5", "01830.h5", "01855.h5", "01863.h5", "01877.h5", "01893.h5", "01941.h5", "01959.h5", "01978.h5",
+    "01983.h5", "01985.h5", "01997.h5", "02006.h5", "02020.h5", "02043.h5", "02078.h5", "02091.h5", "02109.h5", "02197.h5", "02213.h5", "02239.h5", "02240.h5", "02269.h5", "02275.h5", "02281.h5", "02292.h5",
+    "02301.h5", "02348.h5", "02413.h5", "02419.h5", "02450.h5", "02455.h5", "02461.h5", "02480.h5", "02536.h5", "02601.h5", "02734.h5", "02755.h5", "02756.h5", "02805.h5", "02833.h5", "02851.h5", "02885.h5", "02899.h5", "02942.h5", "02966.h5",
+    "02986.h5", "03011.h5", "03024.h5", "03043.h5", "03117.h5", "03126.h5", "03140.h5", "03177.h5", "03183.h5", "03185.h5", "03202.h5", "03216.h5", "03223.h5", "03247.h5", "03270.h5", "03324.h5", "03326.h5", "03344.h5", "03352.h5", "03361.h5", "03366.h5",
+    "03404.h5", "03412.h5", "03451.h5", "03523.h5"]

unigaze/configs/data/gazecapture_train.yaml ADDED Viewed

	@@ -0,0 +1,1189 @@

+type: datasets.gazecapture.GazeCaptureDataset
+params:
+  data_name: gazecapture_train_224
+  color_type: rgb
+  transform_type: 'basic_imagenet'
+  image_size: 224
+  dataset_path: null
+  sample_rate_use: 1
+  keys_to_use:
+    - 00002.h5
+    - 00003.h5
+    - 00005.h5
+    - 00006.h5
+    - 00024.h5
+    - 00028.h5
+    - 00033.h5
+    - 00034.h5
+    - 00087.h5
+    - 00089.h5
+    - 00097.h5
+    - 00098.h5
+    - 00099.h5
+    - 00102.h5
+    - 00103.h5
+    - 00104.h5
+    - 00114.h5
+    - 00120.h5
+    - 00121.h5
+    - 00122.h5
+    - 00123.h5
+    - 00127.h5
+    - 00128.h5
+    - 00130.h5
+    - 00132.h5
+    - 00137.h5
+    - 00138.h5
+    - 00139.h5
+    - 00140.h5
+    - 00141.h5
+    - 00142.h5
+    - 00143.h5
+    - 00144.h5
+    - 00145.h5
+    - 00146.h5
+    - 00148.h5
+    - 00149.h5
+    - 00150.h5
+    - 00153.h5
+    - 00154.h5
+    - 00156.h5
+    - 00162.h5
+    - 00164.h5
+    - 00165.h5
+    - 00173.h5
+    - 00179.h5
+    - 00191.h5
+    - 00194.h5
+    - 00200.h5
+    - 00202.h5
+    - 00208.h5
+    - 00209.h5
+    - 00210.h5
+    - 00211.h5
+    - 00212.h5
+    - 00214.h5
+    - 00218.h5
+    - 00221.h5
+    - 00224.h5
+    - 00225.h5
+    - 00226.h5
+    - 00227.h5
+    - 00228.h5
+    - 00232.h5
+    - 00234.h5
+    - 00236.h5
+    - 00237.h5
+    - 00238.h5
+    - 00239.h5
+    - 00240.h5
+    - 00241.h5
+    - 00243.h5
+    - 00245.h5
+    - 00247.h5
+    - 00249.h5
+    - 00268.h5
+    - 00269.h5
+    - 00273.h5
+    - 00274.h5
+    - 00285.h5
+    - 00288.h5
+    - 00289.h5
+    - 00295.h5
+    - 00296.h5
+    - 00299.h5
+    - 00300.h5
+    - 00303.h5
+    - 00304.h5
+    - 00305.h5
+    - 00307.h5
+    - 00309.h5
+    - 00310.h5
+    - 00311.h5
+    - 00312.h5
+    - 00317.h5
+    - 00324.h5
+    - 00325.h5
+    - 00326.h5
+    - 00331.h5
+    - 00332.h5
+    - 00339.h5
+    - 00342.h5
+    - 00351.h5
+    - 00354.h5
+    - 00355.h5
+    - 00356.h5
+    - 00357.h5
+    - 00358.h5
+    - 00359.h5
+    - 00363.h5
+    - 00376.h5
+    - 00377.h5
+    - 00459.h5
+    - 00465.h5
+    - 00466.h5
+    - 00467.h5
+    - 00469.h5
+    - 00472.h5
+    - 00473.h5
+    - 00475.h5
+    - 00477.h5
+    - 00480.h5
+    - 00481.h5
+    - 00487.h5
+    - 00488.h5
+    - 00491.h5
+    - 00492.h5
+    - 00493.h5
+    - 00494.h5
+    - 00495.h5
+    - 00496.h5
+    - 00499.h5
+    - 00501.h5
+    - 00503.h5
+    - 00505.h5
+    - 00510.h5
+    - 00512.h5
+    - 00513.h5
+    - 00514.h5
+    - 00518.h5
+    - 00519.h5
+    - 00520.h5
+    - 00522.h5
+    - 00525.h5
+    - 00531.h5
+    - 00533.h5
+    - 00534.h5
+    - 00535.h5
+    - 00539.h5
+    - 00540.h5
+    - 00542.h5
+    - 00544.h5
+    - 00545.h5
+    - 00548.h5
+    - 00550.h5
+    - 00553.h5
+    - 00554.h5
+    - 00555.h5
+    - 00560.h5
+    - 00562.h5
+    - 00565.h5
+    - 00566.h5
+    - 00569.h5
+    - 00572.h5
+    - 00574.h5
+    - 00575.h5
+    - 00578.h5
+    - 00581.h5
+    - 00584.h5
+    - 00588.h5
+    - 00590.h5
+    - 00599.h5
+    - 00600.h5
+    - 00601.h5
+    - 00602.h5
+    - 00605.h5
+    - 00606.h5
+    - 00607.h5
+    - 00610.h5
+    - 00613.h5
+    - 00617.h5
+    - 00621.h5
+    - 00622.h5
+    - 00623.h5
+    - 00624.h5
+    - 00626.h5
+    - 00627.h5
+    - 00632.h5
+    - 00633.h5
+    - 00634.h5
+    - 00636.h5
+    - 00638.h5
+    - 00641.h5
+    - 00642.h5
+    - 00643.h5
+    - 00644.h5
+    - 00645.h5
+    - 00649.h5
+    - 00650.h5
+    - 00658.h5
+    - 00661.h5
+    - 00663.h5
+    - 00666.h5
+    - 00667.h5
+    - 00668.h5
+    - 00669.h5
+    - 00670.h5
+    - 00672.h5
+    - 00675.h5
+    - 00676.h5
+    - 00677.h5
+    - 00678.h5
+    - 00679.h5
+    - 00682.h5
+    - 00683.h5
+    - 00687.h5
+    - 00688.h5
+    - 00690.h5
+    - 00691.h5
+    - 00693.h5
+    - 00694.h5
+    - 00695.h5
+    - 00699.h5
+    - 00704.h5
+    - 00706.h5
+    - 00707.h5
+    - 00710.h5
+    - 00711.h5
+    - 00712.h5
+    - 00714.h5
+    - 00716.h5
+    - 00718.h5
+    - 00719.h5
+    - 00722.h5
+    - 00728.h5
+    - 00729.h5
+    - 00730.h5
+    - 00731.h5
+    - 00732.h5
+    - 00733.h5
+    - 00737.h5
+    - 00742.h5
+    - 00743.h5
+    - 00745.h5
+    - 00747.h5
+    - 00749.h5
+    - 00750.h5
+    - 00752.h5
+    - 00753.h5
+    - 00755.h5
+    - 00756.h5
+    - 00757.h5
+    - 00764.h5
+    - 00765.h5
+    - 00767.h5
+    - 00771.h5
+    - 00772.h5
+    - 00773.h5
+    - 00774.h5
+    - 00775.h5
+    - 00789.h5
+    - 00790.h5
+    - 00791.h5
+    - 00795.h5
+    - 00798.h5
+    - 00801.h5
+    - 00802.h5
+    - 00804.h5
+    - 00806.h5
+    - 00807.h5
+    - 00810.h5
+    - 00811.h5
+    - 00812.h5
+    - 00814.h5
+    - 00818.h5
+    - 00819.h5
+    - 00820.h5
+    - 00821.h5
+    - 00823.h5
+    - 00825.h5
+    - 00827.h5
+    - 00831.h5
+    - 00832.h5
+    - 00833.h5
+    - 00835.h5
+    - 00837.h5
+    - 00840.h5
+    - 00841.h5
+    - 00842.h5
+    - 00849.h5
+    - 00850.h5
+    - 00851.h5
+    - 00852.h5
+    - 00853.h5
+    - 00855.h5
+    - 00859.h5
+    - 00864.h5
+    - 00865.h5
+    - 00869.h5
+    - 00872.h5
+    - 00873.h5
+    - 00874.h5
+    - 00875.h5
+    - 00878.h5
+    - 00881.h5
+    - 00882.h5
+    - 00886.h5
+    - 00888.h5
+    - 00889.h5
+    - 00891.h5
+    - 00892.h5
+    - 00894.h5
+    - 00896.h5
+    - 00897.h5
+    - 00898.h5
+    - 00899.h5
+    - 00900.h5
+    - 00904.h5
+    - 00905.h5
+    - 00907.h5
+    - 00911.h5
+    - 00912.h5
+    - 00914.h5
+    - 00915.h5
+    - 00923.h5
+    - 00924.h5
+    - 00927.h5
+    - 00931.h5
+    - 00933.h5
+    - 00934.h5
+    - 00938.h5
+    - 00944.h5
+    - 00945.h5
+    - 00947.h5
+    - 00948.h5
+    - 00956.h5
+    - 00961.h5
+    - 00963.h5
+    - 00969.h5
+    - 00971.h5
+    - 00974.h5
+    - 00980.h5
+    - 00981.h5
+    - 00982.h5
+    - 00983.h5
+    - 00984.h5
+    - 00986.h5
+    - 00989.h5
+    - 00991.h5
+    - 00992.h5
+    - 00997.h5
+    - 00999.h5
+    - 01000.h5
+    - 01002.h5
+    - 01003.h5
+    - 01009.h5
+    - 01010.h5
+    - 01012.h5
+    - 01015.h5
+    - 01018.h5
+    - 01019.h5
+    - 01020.h5
+    - 01021.h5
+    - 01022.h5
+    - 01024.h5
+    - 01025.h5
+    - 01031.h5
+    - 01032.h5
+    - 01034.h5
+    - 01035.h5
+    - 01038.h5
+    - 01039.h5
+    - 01042.h5
+    - 01044.h5
+    - 01045.h5
+    - 01046.h5
+    - 01050.h5
+    - 01052.h5
+    - 01054.h5
+    - 01055.h5
+    - 01056.h5
+    - 01057.h5
+    - 01058.h5
+    - 01059.h5
+    - 01060.h5
+    - 01062.h5
+    - 01063.h5
+    - 01064.h5
+    - 01065.h5
+    - 01069.h5
+    - 01070.h5
+    - 01073.h5
+    - 01075.h5
+    - 01076.h5
+    - 01077.h5
+    - 01080.h5
+    - 01081.h5
+    - 01082.h5
+    - 01083.h5
+    - 01084.h5
+    - 01085.h5
+    - 01086.h5
+    - 01087.h5
+    - 01088.h5
+    - 01089.h5
+    - 01090.h5
+    - 01092.h5
+    - 01093.h5
+    - 01095.h5
+    - 01100.h5
+    - 01102.h5
+    - 01104.h5
+    - 01105.h5
+    - 01106.h5
+    - 01107.h5
+    - 01110.h5
+    - 01118.h5
+    - 01120.h5
+    - 01121.h5
+    - 01123.h5
+    - 01127.h5
+    - 01128.h5
+    - 01129.h5
+    - 01135.h5
+    - 01138.h5
+    - 01139.h5
+    - 01143.h5
+    - 01145.h5
+    - 01146.h5
+    - 01147.h5
+    - 01149.h5
+    - 01151.h5
+    - 01156.h5
+    - 01157.h5
+    - 01158.h5
+    - 01161.h5
+    - 01162.h5
+    - 01163.h5
+    - 01164.h5
+    - 01165.h5
+    - 01166.h5
+    - 01167.h5
+    - 01168.h5
+    - 01169.h5
+    - 01170.h5
+    - 01171.h5
+    - 01172.h5
+    - 01173.h5
+    - 01174.h5
+    - 01175.h5
+    - 01177.h5
+    - 01178.h5
+    - 01180.h5
+    - 01181.h5
+    - 01182.h5
+    - 01184.h5
+    - 01186.h5
+    - 01188.h5
+    - 01191.h5
+    - 01195.h5
+    - 01199.h5
+    - 01201.h5
+    - 01204.h5
+    - 01207.h5
+    - 01208.h5
+    - 01209.h5
+    - 01211.h5
+    - 01212.h5
+    - 01213.h5
+    - 01219.h5
+    - 01221.h5
+    - 01222.h5
+    - 01231.h5
+    - 01232.h5
+    - 01233.h5
+    - 01237.h5
+    - 01243.h5
+    - 01244.h5
+    - 01247.h5
+    - 01250.h5
+    - 01252.h5
+    - 01254.h5
+    - 01255.h5
+    - 01256.h5
+    - 01259.h5
+    - 01260.h5
+    - 01262.h5
+    - 01266.h5
+    - 01269.h5
+    - 01270.h5
+    - 01275.h5
+    - 01276.h5
+    - 01279.h5
+    - 01281.h5
+    - 01283.h5
+    - 01285.h5
+    - 01293.h5
+    - 01295.h5
+    - 01298.h5
+    - 01300.h5
+    - 01301.h5
+    - 01303.h5
+    - 01304.h5
+    - 01315.h5
+    - 01316.h5
+    - 01320.h5
+    - 01323.h5
+    - 01327.h5
+    - 01328.h5
+    - 01330.h5
+    - 01331.h5
+    - 01333.h5
+    - 01340.h5
+    - 01347.h5
+    - 01348.h5
+    - 01349.h5
+    - 01351.h5
+    - 01352.h5
+    - 01353.h5
+    - 01354.h5
+    - 01356.h5
+    - 01357.h5
+    - 01358.h5
+    - 01360.h5
+    - 01361.h5
+    - 01362.h5
+    - 01368.h5
+    - 01375.h5
+    - 01377.h5
+    - 01379.h5
+    - 01380.h5
+    - 01382.h5
+    - 01383.h5
+    - 01384.h5
+    - 01386.h5
+    - 01387.h5
+    - 01388.h5
+    - 01389.h5
+    - 01390.h5
+    - 01391.h5
+    - 01393.h5
+    - 01396.h5
+    - 01400.h5
+    - 01405.h5
+    - 01406.h5
+    - 01414.h5
+    - 01415.h5
+    - 01420.h5
+    - 01421.h5
+    - 01423.h5
+    - 01424.h5
+    - 01428.h5
+    - 01430.h5
+    - 01431.h5
+    - 01434.h5
+    - 01435.h5
+    - 01438.h5
+    - 01440.h5
+    - 01445.h5
+    - 01446.h5
+    - 01448.h5
+    - 01451.h5
+    - 01454.h5
+    - 01456.h5
+    - 01459.h5
+    - 01460.h5
+    - 01462.h5
+    - 01467.h5
+    - 01470.h5
+    - 01471.h5
+    - 01472.h5
+    - 01473.h5
+    - 01478.h5
+    - 01479.h5
+    - 01480.h5
+    - 01481.h5
+    - 01482.h5
+    - 01483.h5
+    - 01485.h5
+    - 01486.h5
+    - 01487.h5
+    - 01488.h5
+    - 01491.h5
+    - 01492.h5
+    - 01496.h5
+    - 01497.h5
+    - 01499.h5
+    - 01508.h5
+    - 01510.h5
+    - 01511.h5
+    - 01514.h5
+    - 01515.h5
+    - 01516.h5
+    - 01519.h5
+    - 01523.h5
+    - 01524.h5
+    - 01528.h5
+    - 01531.h5
+    - 01532.h5
+    - 01533.h5
+    - 01534.h5
+    - 01540.h5
+    - 01542.h5
+    - 01546.h5
+    - 01551.h5
+    - 01553.h5
+    - 01566.h5
+    - 01569.h5
+    - 01574.h5
+    - 01577.h5
+    - 01581.h5
+    - 01582.h5
+    - 01583.h5
+    - 01584.h5
+    - 01602.h5
+    - 01603.h5
+    - 01604.h5
+    - 01606.h5
+    - 01611.h5
+    - 01612.h5
+    - 01613.h5
+    - 01617.h5
+    - 01618.h5
+    - 01627.h5
+    - 01630.h5
+    - 01631.h5
+    - 01633.h5
+    - 01635.h5
+    - 01636.h5
+    - 01637.h5
+    - 01640.h5
+    - 01643.h5
+    - 01644.h5
+    - 01645.h5
+    - 01648.h5
+    - 01650.h5
+    - 01651.h5
+    - 01653.h5
+    - 01658.h5
+    - 01665.h5
+    - 01669.h5
+    - 01671.h5
+    - 01678.h5
+    - 01680.h5
+    - 01681.h5
+    - 01682.h5
+    - 01684.h5
+    - 01687.h5
+    - 01690.h5
+    - 01692.h5
+    - 01693.h5
+    - 01697.h5
+    - 01698.h5
+    - 01700.h5
+    - 01703.h5
+    - 01705.h5
+    - 01706.h5
+    - 01709.h5
+    - 01710.h5
+    - 01713.h5
+    - 01717.h5
+    - 01718.h5
+    - 01719.h5
+    - 01720.h5
+    - 01726.h5
+    - 01727.h5
+    - 01728.h5
+    - 01729.h5
+    - 01730.h5
+    - 01731.h5
+    - 01734.h5
+    - 01738.h5
+    - 01741.h5
+    - 01744.h5
+    - 01745.h5
+    - 01747.h5
+    - 01748.h5
+    - 01755.h5
+    - 01762.h5
+    - 01763.h5
+    - 01768.h5
+    - 01770.h5
+    - 01771.h5
+    - 01775.h5
+    - 01778.h5
+    - 01779.h5
+    - 01783.h5
+    - 01789.h5
+    - 01792.h5
+    - 01795.h5
+    - 01796.h5
+    - 01798.h5
+    - 01802.h5
+    - 01803.h5
+    - 01806.h5
+    - 01812.h5
+    - 01816.h5
+    - 01817.h5
+    - 01818.h5
+    - 01821.h5
+    - 01823.h5
+    - 01825.h5
+    - 01826.h5
+    - 01827.h5
+    - 01828.h5
+    - 01833.h5
+    - 01843.h5
+    - 01849.h5
+    - 01858.h5
+    - 01860.h5
+    - 01862.h5
+    - 01866.h5
+    - 01867.h5
+    - 01868.h5
+    - 01869.h5
+    - 01870.h5
+    - 01874.h5
+    - 01878.h5
+    - 01880.h5
+    - 01882.h5
+    - 01883.h5
+    - 01884.h5
+    - 01885.h5
+    - 01887.h5
+    - 01888.h5
+    - 01889.h5
+    - 01892.h5
+    - 01897.h5
+    - 01900.h5
+    - 01901.h5
+    - 01902.h5
+    - 01905.h5
+    - 01906.h5
+    - 01907.h5
+    - 01908.h5
+    - 01912.h5
+    - 01915.h5
+    - 01921.h5
+    - 01922.h5
+    - 01924.h5
+    - 01925.h5
+    - 01926.h5
+    - 01927.h5
+    - 01930.h5
+    - 01933.h5
+    - 01936.h5
+    - 01943.h5
+    - 01960.h5
+    - 01961.h5
+    - 01962.h5
+    - 01964.h5
+    - 01965.h5
+    - 01966.h5
+    - 01975.h5
+    - 01976.h5
+    - 01977.h5
+    - 01979.h5
+    - 01984.h5
+    - 01987.h5
+    - 01995.h5
+    - 02009.h5
+    - 02011.h5
+    - 02015.h5
+    - 02019.h5
+    - 02022.h5
+    - 02023.h5
+    - 02024.h5
+    - 02025.h5
+    - 02026.h5
+    - 02028.h5
+    - 02029.h5
+    - 02034.h5
+    - 02035.h5
+    - 02038.h5
+    - 02045.h5
+    - 02047.h5
+    - 02051.h5
+    - 02052.h5
+    - 02056.h5
+    - 02058.h5
+    - 02059.h5
+    - 02061.h5
+    - 02064.h5
+    - 02065.h5
+    - 02077.h5
+    - 02084.h5
+    - 02085.h5
+    - 02086.h5
+    - 02087.h5
+    - 02090.h5
+    - 02092.h5
+    - 02093.h5
+    - 02099.h5
+    - 02102.h5
+    - 02105.h5
+    - 02106.h5
+    - 02112.h5
+    - 02113.h5
+    - 02114.h5
+    - 02115.h5
+    - 02118.h5
+    - 02123.h5
+    - 02131.h5
+    - 02136.h5
+    - 02137.h5
+    - 02138.h5
+    - 02140.h5
+    - 02141.h5
+    - 02142.h5
+    - 02152.h5
+    - 02154.h5
+    - 02156.h5
+    - 02159.h5
+    - 02161.h5
+    - 02162.h5
+    - 02168.h5
+    - 02170.h5
+    - 02172.h5
+    - 02173.h5
+    - 02186.h5
+    - 02187.h5
+    - 02193.h5
+    - 02198.h5
+    - 02203.h5
+    - 02204.h5
+    - 02206.h5
+    - 02207.h5
+    - 02212.h5
+    - 02216.h5
+    - 02219.h5
+    - 02220.h5
+    - 02229.h5
+    - 02230.h5
+    - 02232.h5
+    - 02234.h5
+    - 02237.h5
+    - 02241.h5
+    - 02244.h5
+    - 02249.h5
+    - 02250.h5
+    - 02255.h5
+    - 02257.h5
+    - 02264.h5
+    - 02266.h5
+    - 02267.h5
+    - 02270.h5
+    - 02272.h5
+    - 02277.h5
+    - 02278.h5
+    - 02279.h5
+    - 02282.h5
+    - 02293.h5
+    - 02297.h5
+    - 02298.h5
+    - 02300.h5
+    - 02311.h5
+    - 02314.h5
+    - 02319.h5
+    - 02321.h5
+    - 02322.h5
+    - 02324.h5
+    - 02326.h5
+    - 02327.h5
+    - 02328.h5
+    - 02332.h5
+    - 02334.h5
+    - 02337.h5
+    - 02339.h5
+    - 02342.h5
+    - 02343.h5
+    - 02347.h5
+    - 02349.h5
+    - 02350.h5
+    - 02352.h5
+    - 02355.h5
+    - 02358.h5
+    - 02359.h5
+    - 02361.h5
+    - 02362.h5
+    - 02365.h5
+    - 02366.h5
+    - 02367.h5
+    - 02368.h5
+    - 02370.h5
+    - 02371.h5
+    - 02373.h5
+    - 02375.h5
+    - 02379.h5
+    - 02394.h5
+    - 02412.h5
+    - 02414.h5
+    - 02415.h5
+    - 02418.h5
+    - 02420.h5
+    - 02421.h5
+    - 02424.h5
+    - 02426.h5
+    - 02430.h5
+    - 02431.h5
+    - 02432.h5
+    - 02434.h5
+    - 02435.h5
+    - 02436.h5
+    - 02439.h5
+    - 02440.h5
+    - 02441.h5
+    - 02442.h5
+    - 02443.h5
+    - 02445.h5
+    - 02447.h5
+    - 02448.h5
+    - 02452.h5
+    - 02454.h5
+    - 02457.h5
+    - 02458.h5
+    - 02459.h5
+    - 02462.h5
+    - 02465.h5
+    - 02467.h5
+    - 02468.h5
+    - 02469.h5
+    - 02472.h5
+    - 02474.h5
+    - 02478.h5
+    - 02510.h5
+    - 02518.h5
+    - 02520.h5
+    - 02521.h5
+    - 02522.h5
+    - 02524.h5
+    - 02525.h5
+    - 02534.h5
+    - 02535.h5
+    - 02540.h5
+    - 02547.h5
+    - 02550.h5
+    - 02552.h5
+    - 02553.h5
+    - 02554.h5
+    - 02557.h5
+    - 02559.h5
+    - 02566.h5
+    - 02567.h5
+    - 02571.h5
+    - 02573.h5
+    - 02575.h5
+    - 02576.h5
+    - 02578.h5
+    - 02581.h5
+    - 02585.h5
+    - 02587.h5
+    - 02588.h5
+    - 02590.h5
+    - 02595.h5
+    - 02610.h5
+    - 02611.h5
+    - 02613.h5
+    - 02615.h5
+    - 02617.h5
+    - 02619.h5
+    - 02629.h5
+    - 02632.h5
+    - 02634.h5
+    - 02649.h5
+    - 02663.h5
+    - 02666.h5
+    - 02669.h5
+    - 02673.h5
+    - 02681.h5
+    - 02689.h5
+    - 02690.h5
+    - 02700.h5
+    - 02705.h5
+    - 02709.h5
+    - 02713.h5
+    - 02718.h5
+    - 02721.h5
+    - 02722.h5
+    - 02723.h5
+    - 02725.h5
+    - 02729.h5
+    - 02730.h5
+    - 02732.h5
+    - 02737.h5
+    - 02740.h5
+    - 02741.h5
+    - 02749.h5
+    - 02758.h5
+    - 02760.h5
+    - 02761.h5
+    - 02762.h5
+    - 02763.h5
+    - 02764.h5
+    - 02765.h5
+    - 02772.h5
+    - 02773.h5
+    - 02774.h5
+    - 02776.h5
+    - 02780.h5
+    - 02781.h5
+    - 02785.h5
+    - 02797.h5
+    - 02818.h5
+    - 02819.h5
+    - 02827.h5
+    - 02829.h5
+    - 02832.h5
+    - 02837.h5
+    - 02841.h5
+    - 02843.h5
+    - 02846.h5
+    - 02847.h5
+    - 02852.h5
+    - 02854.h5
+    - 02857.h5
+    - 02868.h5
+    - 02872.h5
+    - 02873.h5
+    - 02874.h5
+    - 02876.h5
+    - 02877.h5
+    - 02878.h5
+    - 02879.h5
+    - 02880.h5
+    - 02882.h5
+    - 02883.h5
+    - 02888.h5
+    - 02898.h5
+    - 02902.h5
+    - 02908.h5
+    - 02911.h5
+    - 02919.h5
+    - 02920.h5
+    - 02921.h5
+    - 02922.h5
+    - 02924.h5
+    - 02925.h5
+    - 02928.h5
+    - 02938.h5
+    - 02941.h5
+    - 02944.h5
+    - 02945.h5
+    - 02954.h5
+    - 02955.h5
+    - 02956.h5
+    - 02960.h5
+    - 02961.h5
+    - 02964.h5
+    - 02967.h5
+    - 02977.h5
+    - 02978.h5
+    - 02979.h5
+    - 02980.h5
+    - 02985.h5
+    - 02987.h5
+    - 02988.h5
+    - 02989.h5
+    - 02991.h5
+    - 02997.h5
+    - 02998.h5
+    - 03003.h5
+    - 03004.h5
+    - 03006.h5
+    - 03009.h5
+    - 03012.h5
+    - 03013.h5
+    - 03014.h5
+    - 03023.h5
+    - 03026.h5
+    - 03027.h5
+    - 03037.h5
+    - 03042.h5
+    - 03051.h5
+    - 03057.h5
+    - 03064.h5
+    - 03065.h5
+    - 03079.h5
+    - 03089.h5
+    - 03102.h5
+    - 03107.h5
+    - 03116.h5
+    - 03122.h5
+    - 03125.h5
+    - 03130.h5
+    - 03133.h5
+    - 03134.h5
+    - 03137.h5
+    - 03139.h5
+    - 03160.h5
+    - 03163.h5
+    - 03172.h5
+    - 03174.h5
+    - 03178.h5
+    - 03179.h5
+    - 03180.h5
+    - 03188.h5
+    - 03189.h5
+    - 03190.h5
+    - 03192.h5
+    - 03193.h5
+    - 03197.h5
+    - 03199.h5
+    - 03200.h5
+    - 03205.h5
+    - 03206.h5
+    - 03211.h5
+    - 03218.h5
+    - 03219.h5
+    - 03222.h5
+    - 03225.h5
+    - 03231.h5
+    - 03246.h5
+    - 03248.h5
+    - 03251.h5
+    - 03253.h5
+    - 03255.h5
+    - 03259.h5
+    - 03263.h5
+    - 03265.h5
+    - 03266.h5
+    - 03273.h5
+    - 03275.h5
+    - 03277.h5
+    - 03278.h5
+    - 03282.h5
+    - 03283.h5
+    - 03302.h5
+    - 03303.h5
+    - 03304.h5
+    - 03307.h5
+    - 03314.h5
+    - 03315.h5
+    - 03327.h5
+    - 03328.h5
+    - 03332.h5
+    - 03336.h5
+    - 03340.h5
+    - 03342.h5
+    - 03343.h5
+    - 03348.h5
+    - 03351.h5
+    - 03354.h5
+    - 03358.h5
+    - 03359.h5
+    - 03360.h5
+    - 03367.h5
+    - 03371.h5
+    - 03374.h5
+    - 03375.h5
+    - 03377.h5
+    - 03378.h5
+    - 03379.h5
+    - 03381.h5
+    - 03382.h5
+    - 03384.h5
+    - 03397.h5
+    - 03403.h5
+    - 03406.h5
+    - 03413.h5
+    - 03425.h5
+    - 03431.h5
+    - 03432.h5
+    - 03435.h5
+    - 03442.h5
+    - 03453.h5
+    - 03454.h5
+    - 03456.h5
+    - 03463.h5
+    - 03465.h5
+    - 03466.h5
+    - 03467.h5
+    - 03469.h5
+    - 03473.h5
+    - 03491.h5
+    - 03492.h5
+    - 03495.h5
+    - 03498.h5
+    - 03501.h5
+    - 03502.h5

unigaze/configs/data/gazecapture_train_ds15.yaml ADDED Viewed

	@@ -0,0 +1,1189 @@

+type: datasets.gazecapture.GazeCaptureDataset
+params:
+  data_name: gazecapture_train_224
+  color_type: rgb
+  transform_type: 'basic_imagenet'
+  image_size: 224
+  dataset_path: null
+  sample_rate_use: 15
+  keys_to_use:
+    - 00002.h5
+    - 00003.h5
+    - 00005.h5
+    - 00006.h5
+    - 00024.h5
+    - 00028.h5
+    - 00033.h5
+    - 00034.h5
+    - 00087.h5
+    - 00089.h5
+    - 00097.h5
+    - 00098.h5
+    - 00099.h5
+    - 00102.h5
+    - 00103.h5
+    - 00104.h5
+    - 00114.h5
+    - 00120.h5
+    - 00121.h5
+    - 00122.h5
+    - 00123.h5
+    - 00127.h5
+    - 00128.h5
+    - 00130.h5
+    - 00132.h5
+    - 00137.h5
+    - 00138.h5
+    - 00139.h5
+    - 00140.h5
+    - 00141.h5
+    - 00142.h5
+    - 00143.h5
+    - 00144.h5
+    - 00145.h5
+    - 00146.h5
+    - 00148.h5
+    - 00149.h5
+    - 00150.h5
+    - 00153.h5
+    - 00154.h5
+    - 00156.h5
+    - 00162.h5
+    - 00164.h5
+    - 00165.h5
+    - 00173.h5
+    - 00179.h5
+    - 00191.h5
+    - 00194.h5
+    - 00200.h5
+    - 00202.h5
+    - 00208.h5
+    - 00209.h5
+    - 00210.h5
+    - 00211.h5
+    - 00212.h5
+    - 00214.h5
+    - 00218.h5
+    - 00221.h5
+    - 00224.h5
+    - 00225.h5
+    - 00226.h5
+    - 00227.h5
+    - 00228.h5
+    - 00232.h5
+    - 00234.h5
+    - 00236.h5
+    - 00237.h5
+    - 00238.h5
+    - 00239.h5
+    - 00240.h5
+    - 00241.h5
+    - 00243.h5
+    - 00245.h5
+    - 00247.h5
+    - 00249.h5
+    - 00268.h5
+    - 00269.h5
+    - 00273.h5
+    - 00274.h5
+    - 00285.h5
+    - 00288.h5
+    - 00289.h5
+    - 00295.h5
+    - 00296.h5
+    - 00299.h5
+    - 00300.h5
+    - 00303.h5
+    - 00304.h5
+    - 00305.h5
+    - 00307.h5
+    - 00309.h5
+    - 00310.h5
+    - 00311.h5
+    - 00312.h5
+    - 00317.h5
+    - 00324.h5
+    - 00325.h5
+    - 00326.h5
+    - 00331.h5
+    - 00332.h5
+    - 00339.h5
+    - 00342.h5
+    - 00351.h5
+    - 00354.h5
+    - 00355.h5
+    - 00356.h5
+    - 00357.h5
+    - 00358.h5
+    - 00359.h5
+    - 00363.h5
+    - 00376.h5
+    - 00377.h5
+    - 00459.h5
+    - 00465.h5
+    - 00466.h5
+    - 00467.h5
+    - 00469.h5
+    - 00472.h5
+    - 00473.h5
+    - 00475.h5
+    - 00477.h5
+    - 00480.h5
+    - 00481.h5
+    - 00487.h5
+    - 00488.h5
+    - 00491.h5
+    - 00492.h5
+    - 00493.h5
+    - 00494.h5
+    - 00495.h5
+    - 00496.h5
+    - 00499.h5
+    - 00501.h5
+    - 00503.h5
+    - 00505.h5
+    - 00510.h5
+    - 00512.h5
+    - 00513.h5
+    - 00514.h5
+    - 00518.h5
+    - 00519.h5
+    - 00520.h5
+    - 00522.h5
+    - 00525.h5
+    - 00531.h5
+    - 00533.h5
+    - 00534.h5
+    - 00535.h5
+    - 00539.h5
+    - 00540.h5
+    - 00542.h5
+    - 00544.h5
+    - 00545.h5
+    - 00548.h5
+    - 00550.h5
+    - 00553.h5
+    - 00554.h5
+    - 00555.h5
+    - 00560.h5
+    - 00562.h5
+    - 00565.h5
+    - 00566.h5
+    - 00569.h5
+    - 00572.h5
+    - 00574.h5
+    - 00575.h5
+    - 00578.h5
+    - 00581.h5
+    - 00584.h5
+    - 00588.h5
+    - 00590.h5
+    - 00599.h5
+    - 00600.h5
+    - 00601.h5
+    - 00602.h5
+    - 00605.h5
+    - 00606.h5
+    - 00607.h5
+    - 00610.h5
+    - 00613.h5
+    - 00617.h5
+    - 00621.h5
+    - 00622.h5
+    - 00623.h5
+    - 00624.h5
+    - 00626.h5
+    - 00627.h5
+    - 00632.h5
+    - 00633.h5
+    - 00634.h5
+    - 00636.h5
+    - 00638.h5
+    - 00641.h5
+    - 00642.h5
+    - 00643.h5
+    - 00644.h5
+    - 00645.h5
+    - 00649.h5
+    - 00650.h5
+    - 00658.h5
+    - 00661.h5
+    - 00663.h5
+    - 00666.h5
+    - 00667.h5
+    - 00668.h5
+    - 00669.h5
+    - 00670.h5
+    - 00672.h5
+    - 00675.h5
+    - 00676.h5
+    - 00677.h5
+    - 00678.h5
+    - 00679.h5
+    - 00682.h5
+    - 00683.h5
+    - 00687.h5
+    - 00688.h5
+    - 00690.h5
+    - 00691.h5
+    - 00693.h5
+    - 00694.h5
+    - 00695.h5
+    - 00699.h5
+    - 00704.h5
+    - 00706.h5
+    - 00707.h5
+    - 00710.h5
+    - 00711.h5
+    - 00712.h5
+    - 00714.h5
+    - 00716.h5
+    - 00718.h5
+    - 00719.h5
+    - 00722.h5
+    - 00728.h5
+    - 00729.h5
+    - 00730.h5
+    - 00731.h5
+    - 00732.h5
+    - 00733.h5
+    - 00737.h5
+    - 00742.h5
+    - 00743.h5
+    - 00745.h5
+    - 00747.h5
+    - 00749.h5
+    - 00750.h5
+    - 00752.h5
+    - 00753.h5
+    - 00755.h5
+    - 00756.h5
+    - 00757.h5
+    - 00764.h5
+    - 00765.h5
+    - 00767.h5
+    - 00771.h5
+    - 00772.h5
+    - 00773.h5
+    - 00774.h5
+    - 00775.h5
+    - 00789.h5
+    - 00790.h5
+    - 00791.h5
+    - 00795.h5
+    - 00798.h5
+    - 00801.h5
+    - 00802.h5
+    - 00804.h5
+    - 00806.h5
+    - 00807.h5
+    - 00810.h5
+    - 00811.h5
+    - 00812.h5
+    - 00814.h5
+    - 00818.h5
+    - 00819.h5
+    - 00820.h5
+    - 00821.h5
+    - 00823.h5
+    - 00825.h5
+    - 00827.h5
+    - 00831.h5
+    - 00832.h5
+    - 00833.h5
+    - 00835.h5
+    - 00837.h5
+    - 00840.h5
+    - 00841.h5
+    - 00842.h5
+    - 00849.h5
+    - 00850.h5
+    - 00851.h5
+    - 00852.h5
+    - 00853.h5
+    - 00855.h5
+    - 00859.h5
+    - 00864.h5
+    - 00865.h5
+    - 00869.h5
+    - 00872.h5
+    - 00873.h5
+    - 00874.h5
+    - 00875.h5
+    - 00878.h5
+    - 00881.h5
+    - 00882.h5
+    - 00886.h5
+    - 00888.h5
+    - 00889.h5
+    - 00891.h5
+    - 00892.h5
+    - 00894.h5
+    - 00896.h5
+    - 00897.h5
+    - 00898.h5
+    - 00899.h5
+    - 00900.h5
+    - 00904.h5
+    - 00905.h5
+    - 00907.h5
+    - 00911.h5
+    - 00912.h5
+    - 00914.h5
+    - 00915.h5
+    - 00923.h5
+    - 00924.h5
+    - 00927.h5
+    - 00931.h5
+    - 00933.h5
+    - 00934.h5
+    - 00938.h5
+    - 00944.h5
+    - 00945.h5
+    - 00947.h5
+    - 00948.h5
+    - 00956.h5
+    - 00961.h5
+    - 00963.h5
+    - 00969.h5
+    - 00971.h5
+    - 00974.h5
+    - 00980.h5
+    - 00981.h5
+    - 00982.h5
+    - 00983.h5
+    - 00984.h5
+    - 00986.h5
+    - 00989.h5
+    - 00991.h5
+    - 00992.h5
+    - 00997.h5
+    - 00999.h5
+    - 01000.h5
+    - 01002.h5
+    - 01003.h5
+    - 01009.h5
+    - 01010.h5
+    - 01012.h5
+    - 01015.h5
+    - 01018.h5
+    - 01019.h5
+    - 01020.h5
+    - 01021.h5
+    - 01022.h5
+    - 01024.h5
+    - 01025.h5
+    - 01031.h5
+    - 01032.h5
+    - 01034.h5
+    - 01035.h5
+    - 01038.h5
+    - 01039.h5
+    - 01042.h5
+    - 01044.h5
+    - 01045.h5
+    - 01046.h5
+    - 01050.h5
+    - 01052.h5
+    - 01054.h5
+    - 01055.h5
+    - 01056.h5
+    - 01057.h5
+    - 01058.h5
+    - 01059.h5
+    - 01060.h5
+    - 01062.h5
+    - 01063.h5
+    - 01064.h5
+    - 01065.h5
+    - 01069.h5
+    - 01070.h5
+    - 01073.h5
+    - 01075.h5
+    - 01076.h5
+    - 01077.h5
+    - 01080.h5
+    - 01081.h5
+    - 01082.h5
+    - 01083.h5
+    - 01084.h5
+    - 01085.h5
+    - 01086.h5
+    - 01087.h5
+    - 01088.h5
+    - 01089.h5
+    - 01090.h5
+    - 01092.h5
+    - 01093.h5
+    - 01095.h5
+    - 01100.h5
+    - 01102.h5
+    - 01104.h5
+    - 01105.h5
+    - 01106.h5
+    - 01107.h5
+    - 01110.h5
+    - 01118.h5
+    - 01120.h5
+    - 01121.h5
+    - 01123.h5
+    - 01127.h5
+    - 01128.h5
+    - 01129.h5
+    - 01135.h5
+    - 01138.h5
+    - 01139.h5
+    - 01143.h5
+    - 01145.h5
+    - 01146.h5
+    - 01147.h5
+    - 01149.h5
+    - 01151.h5
+    - 01156.h5
+    - 01157.h5
+    - 01158.h5
+    - 01161.h5
+    - 01162.h5
+    - 01163.h5
+    - 01164.h5
+    - 01165.h5
+    - 01166.h5
+    - 01167.h5
+    - 01168.h5
+    - 01169.h5
+    - 01170.h5
+    - 01171.h5
+    - 01172.h5
+    - 01173.h5
+    - 01174.h5
+    - 01175.h5
+    - 01177.h5
+    - 01178.h5
+    - 01180.h5
+    - 01181.h5
+    - 01182.h5
+    - 01184.h5
+    - 01186.h5
+    - 01188.h5
+    - 01191.h5
+    - 01195.h5
+    - 01199.h5
+    - 01201.h5
+    - 01204.h5
+    - 01207.h5
+    - 01208.h5
+    - 01209.h5
+    - 01211.h5
+    - 01212.h5
+    - 01213.h5
+    - 01219.h5
+    - 01221.h5
+    - 01222.h5
+    - 01231.h5
+    - 01232.h5
+    - 01233.h5
+    - 01237.h5
+    - 01243.h5
+    - 01244.h5
+    - 01247.h5
+    - 01250.h5
+    - 01252.h5
+    - 01254.h5
+    - 01255.h5
+    - 01256.h5
+    - 01259.h5
+    - 01260.h5
+    - 01262.h5
+    - 01266.h5
+    - 01269.h5
+    - 01270.h5
+    - 01275.h5
+    - 01276.h5
+    - 01279.h5
+    - 01281.h5
+    - 01283.h5
+    - 01285.h5
+    - 01293.h5
+    - 01295.h5
+    - 01298.h5
+    - 01300.h5
+    - 01301.h5
+    - 01303.h5
+    - 01304.h5
+    - 01315.h5
+    - 01316.h5
+    - 01320.h5
+    - 01323.h5
+    - 01327.h5
+    - 01328.h5
+    - 01330.h5
+    - 01331.h5
+    - 01333.h5
+    - 01340.h5
+    - 01347.h5
+    - 01348.h5
+    - 01349.h5
+    - 01351.h5
+    - 01352.h5
+    - 01353.h5
+    - 01354.h5
+    - 01356.h5
+    - 01357.h5
+    - 01358.h5
+    - 01360.h5
+    - 01361.h5
+    - 01362.h5
+    - 01368.h5
+    - 01375.h5
+    - 01377.h5
+    - 01379.h5
+    - 01380.h5
+    - 01382.h5
+    - 01383.h5
+    - 01384.h5
+    - 01386.h5
+    - 01387.h5
+    - 01388.h5
+    - 01389.h5
+    - 01390.h5
+    - 01391.h5
+    - 01393.h5
+    - 01396.h5
+    - 01400.h5
+    - 01405.h5
+    - 01406.h5
+    - 01414.h5
+    - 01415.h5
+    - 01420.h5
+    - 01421.h5
+    - 01423.h5
+    - 01424.h5
+    - 01428.h5
+    - 01430.h5
+    - 01431.h5
+    - 01434.h5
+    - 01435.h5
+    - 01438.h5
+    - 01440.h5
+    - 01445.h5
+    - 01446.h5
+    - 01448.h5
+    - 01451.h5
+    - 01454.h5
+    - 01456.h5
+    - 01459.h5
+    - 01460.h5
+    - 01462.h5
+    - 01467.h5
+    - 01470.h5
+    - 01471.h5
+    - 01472.h5
+    - 01473.h5
+    - 01478.h5
+    - 01479.h5
+    - 01480.h5
+    - 01481.h5
+    - 01482.h5
+    - 01483.h5
+    - 01485.h5
+    - 01486.h5
+    - 01487.h5
+    - 01488.h5
+    - 01491.h5
+    - 01492.h5
+    - 01496.h5
+    - 01497.h5
+    - 01499.h5
+    - 01508.h5
+    - 01510.h5
+    - 01511.h5
+    - 01514.h5
+    - 01515.h5
+    - 01516.h5
+    - 01519.h5
+    - 01523.h5
+    - 01524.h5
+    - 01528.h5
+    - 01531.h5
+    - 01532.h5
+    - 01533.h5
+    - 01534.h5
+    - 01540.h5
+    - 01542.h5
+    - 01546.h5
+    - 01551.h5
+    - 01553.h5
+    - 01566.h5
+    - 01569.h5
+    - 01574.h5
+    - 01577.h5
+    - 01581.h5
+    - 01582.h5
+    - 01583.h5
+    - 01584.h5
+    - 01602.h5
+    - 01603.h5
+    - 01604.h5
+    - 01606.h5
+    - 01611.h5
+    - 01612.h5
+    - 01613.h5
+    - 01617.h5
+    - 01618.h5
+    - 01627.h5
+    - 01630.h5
+    - 01631.h5
+    - 01633.h5
+    - 01635.h5
+    - 01636.h5
+    - 01637.h5
+    - 01640.h5
+    - 01643.h5
+    - 01644.h5
+    - 01645.h5
+    - 01648.h5
+    - 01650.h5
+    - 01651.h5
+    - 01653.h5
+    - 01658.h5
+    - 01665.h5
+    - 01669.h5
+    - 01671.h5
+    - 01678.h5
+    - 01680.h5
+    - 01681.h5
+    - 01682.h5
+    - 01684.h5
+    - 01687.h5
+    - 01690.h5
+    - 01692.h5
+    - 01693.h5
+    - 01697.h5
+    - 01698.h5
+    - 01700.h5
+    - 01703.h5
+    - 01705.h5
+    - 01706.h5
+    - 01709.h5
+    - 01710.h5
+    - 01713.h5
+    - 01717.h5
+    - 01718.h5
+    - 01719.h5
+    - 01720.h5
+    - 01726.h5
+    - 01727.h5
+    - 01728.h5
+    - 01729.h5
+    - 01730.h5
+    - 01731.h5
+    - 01734.h5
+    - 01738.h5
+    - 01741.h5
+    - 01744.h5
+    - 01745.h5
+    - 01747.h5
+    - 01748.h5
+    - 01755.h5
+    - 01762.h5
+    - 01763.h5
+    - 01768.h5
+    - 01770.h5
+    - 01771.h5
+    - 01775.h5
+    - 01778.h5
+    - 01779.h5
+    - 01783.h5
+    - 01789.h5
+    - 01792.h5
+    - 01795.h5
+    - 01796.h5
+    - 01798.h5
+    - 01802.h5
+    - 01803.h5
+    - 01806.h5
+    - 01812.h5
+    - 01816.h5
+    - 01817.h5
+    - 01818.h5
+    - 01821.h5
+    - 01823.h5
+    - 01825.h5
+    - 01826.h5
+    - 01827.h5
+    - 01828.h5
+    - 01833.h5
+    - 01843.h5
+    - 01849.h5
+    - 01858.h5
+    - 01860.h5
+    - 01862.h5
+    - 01866.h5
+    - 01867.h5
+    - 01868.h5
+    - 01869.h5
+    - 01870.h5
+    - 01874.h5
+    - 01878.h5
+    - 01880.h5
+    - 01882.h5
+    - 01883.h5
+    - 01884.h5
+    - 01885.h5
+    - 01887.h5
+    - 01888.h5
+    - 01889.h5
+    - 01892.h5
+    - 01897.h5
+    - 01900.h5
+    - 01901.h5
+    - 01902.h5
+    - 01905.h5
+    - 01906.h5
+    - 01907.h5
+    - 01908.h5
+    - 01912.h5
+    - 01915.h5
+    - 01921.h5
+    - 01922.h5
+    - 01924.h5
+    - 01925.h5
+    - 01926.h5
+    - 01927.h5
+    - 01930.h5
+    - 01933.h5
+    - 01936.h5
+    - 01943.h5
+    - 01960.h5
+    - 01961.h5
+    - 01962.h5
+    - 01964.h5
+    - 01965.h5
+    - 01966.h5
+    - 01975.h5
+    - 01976.h5
+    - 01977.h5
+    - 01979.h5
+    - 01984.h5
+    - 01987.h5
+    - 01995.h5
+    - 02009.h5
+    - 02011.h5
+    - 02015.h5
+    - 02019.h5
+    - 02022.h5
+    - 02023.h5
+    - 02024.h5
+    - 02025.h5
+    - 02026.h5
+    - 02028.h5
+    - 02029.h5
+    - 02034.h5
+    - 02035.h5
+    - 02038.h5
+    - 02045.h5
+    - 02047.h5
+    - 02051.h5
+    - 02052.h5
+    - 02056.h5
+    - 02058.h5
+    - 02059.h5
+    - 02061.h5
+    - 02064.h5
+    - 02065.h5
+    - 02077.h5
+    - 02084.h5
+    - 02085.h5
+    - 02086.h5
+    - 02087.h5
+    - 02090.h5
+    - 02092.h5
+    - 02093.h5
+    - 02099.h5
+    - 02102.h5
+    - 02105.h5
+    - 02106.h5
+    - 02112.h5
+    - 02113.h5
+    - 02114.h5
+    - 02115.h5
+    - 02118.h5
+    - 02123.h5
+    - 02131.h5
+    - 02136.h5
+    - 02137.h5
+    - 02138.h5
+    - 02140.h5
+    - 02141.h5
+    - 02142.h5
+    - 02152.h5
+    - 02154.h5
+    - 02156.h5
+    - 02159.h5
+    - 02161.h5
+    - 02162.h5
+    - 02168.h5
+    - 02170.h5
+    - 02172.h5
+    - 02173.h5
+    - 02186.h5
+    - 02187.h5
+    - 02193.h5
+    - 02198.h5
+    - 02203.h5
+    - 02204.h5
+    - 02206.h5
+    - 02207.h5
+    - 02212.h5
+    - 02216.h5
+    - 02219.h5
+    - 02220.h5
+    - 02229.h5
+    - 02230.h5
+    - 02232.h5
+    - 02234.h5
+    - 02237.h5
+    - 02241.h5
+    - 02244.h5
+    - 02249.h5
+    - 02250.h5
+    - 02255.h5
+    - 02257.h5
+    - 02264.h5
+    - 02266.h5
+    - 02267.h5
+    - 02270.h5
+    - 02272.h5
+    - 02277.h5
+    - 02278.h5
+    - 02279.h5
+    - 02282.h5
+    - 02293.h5
+    - 02297.h5
+    - 02298.h5
+    - 02300.h5
+    - 02311.h5
+    - 02314.h5
+    - 02319.h5
+    - 02321.h5
+    - 02322.h5
+    - 02324.h5
+    - 02326.h5
+    - 02327.h5
+    - 02328.h5
+    - 02332.h5
+    - 02334.h5
+    - 02337.h5
+    - 02339.h5
+    - 02342.h5
+    - 02343.h5
+    - 02347.h5
+    - 02349.h5
+    - 02350.h5
+    - 02352.h5
+    - 02355.h5
+    - 02358.h5
+    - 02359.h5
+    - 02361.h5
+    - 02362.h5
+    - 02365.h5
+    - 02366.h5
+    - 02367.h5
+    - 02368.h5
+    - 02370.h5
+    - 02371.h5
+    - 02373.h5
+    - 02375.h5
+    - 02379.h5
+    - 02394.h5
+    - 02412.h5
+    - 02414.h5
+    - 02415.h5
+    - 02418.h5
+    - 02420.h5
+    - 02421.h5
+    - 02424.h5
+    - 02426.h5
+    - 02430.h5
+    - 02431.h5
+    - 02432.h5
+    - 02434.h5
+    - 02435.h5
+    - 02436.h5
+    - 02439.h5
+    - 02440.h5
+    - 02441.h5
+    - 02442.h5
+    - 02443.h5
+    - 02445.h5
+    - 02447.h5
+    - 02448.h5
+    - 02452.h5
+    - 02454.h5
+    - 02457.h5
+    - 02458.h5
+    - 02459.h5
+    - 02462.h5
+    - 02465.h5
+    - 02467.h5
+    - 02468.h5
+    - 02469.h5
+    - 02472.h5
+    - 02474.h5
+    - 02478.h5
+    - 02510.h5
+    - 02518.h5
+    - 02520.h5
+    - 02521.h5
+    - 02522.h5
+    - 02524.h5
+    - 02525.h5
+    - 02534.h5
+    - 02535.h5
+    - 02540.h5
+    - 02547.h5
+    - 02550.h5
+    - 02552.h5
+    - 02553.h5
+    - 02554.h5
+    - 02557.h5
+    - 02559.h5
+    - 02566.h5
+    - 02567.h5
+    - 02571.h5
+    - 02573.h5
+    - 02575.h5
+    - 02576.h5
+    - 02578.h5
+    - 02581.h5
+    - 02585.h5
+    - 02587.h5
+    - 02588.h5
+    - 02590.h5
+    - 02595.h5
+    - 02610.h5
+    - 02611.h5
+    - 02613.h5
+    - 02615.h5
+    - 02617.h5
+    - 02619.h5
+    - 02629.h5
+    - 02632.h5
+    - 02634.h5
+    - 02649.h5
+    - 02663.h5
+    - 02666.h5
+    - 02669.h5
+    - 02673.h5
+    - 02681.h5
+    - 02689.h5
+    - 02690.h5
+    - 02700.h5
+    - 02705.h5
+    - 02709.h5
+    - 02713.h5
+    - 02718.h5
+    - 02721.h5
+    - 02722.h5
+    - 02723.h5
+    - 02725.h5
+    - 02729.h5
+    - 02730.h5
+    - 02732.h5
+    - 02737.h5
+    - 02740.h5
+    - 02741.h5
+    - 02749.h5
+    - 02758.h5
+    - 02760.h5
+    - 02761.h5
+    - 02762.h5
+    - 02763.h5
+    - 02764.h5
+    - 02765.h5
+    - 02772.h5
+    - 02773.h5
+    - 02774.h5
+    - 02776.h5
+    - 02780.h5
+    - 02781.h5
+    - 02785.h5
+    - 02797.h5
+    - 02818.h5
+    - 02819.h5
+    - 02827.h5
+    - 02829.h5
+    - 02832.h5
+    - 02837.h5
+    - 02841.h5
+    - 02843.h5
+    - 02846.h5
+    - 02847.h5
+    - 02852.h5
+    - 02854.h5
+    - 02857.h5
+    - 02868.h5
+    - 02872.h5
+    - 02873.h5
+    - 02874.h5
+    - 02876.h5
+    - 02877.h5
+    - 02878.h5
+    - 02879.h5
+    - 02880.h5
+    - 02882.h5
+    - 02883.h5
+    - 02888.h5
+    - 02898.h5
+    - 02902.h5
+    - 02908.h5
+    - 02911.h5
+    - 02919.h5
+    - 02920.h5
+    - 02921.h5
+    - 02922.h5
+    - 02924.h5
+    - 02925.h5
+    - 02928.h5
+    - 02938.h5
+    - 02941.h5
+    - 02944.h5
+    - 02945.h5
+    - 02954.h5
+    - 02955.h5
+    - 02956.h5
+    - 02960.h5
+    - 02961.h5
+    - 02964.h5
+    - 02967.h5
+    - 02977.h5
+    - 02978.h5
+    - 02979.h5
+    - 02980.h5
+    - 02985.h5
+    - 02987.h5
+    - 02988.h5
+    - 02989.h5
+    - 02991.h5
+    - 02997.h5
+    - 02998.h5
+    - 03003.h5
+    - 03004.h5
+    - 03006.h5
+    - 03009.h5
+    - 03012.h5
+    - 03013.h5
+    - 03014.h5
+    - 03023.h5
+    - 03026.h5
+    - 03027.h5
+    - 03037.h5
+    - 03042.h5
+    - 03051.h5
+    - 03057.h5
+    - 03064.h5
+    - 03065.h5
+    - 03079.h5
+    - 03089.h5
+    - 03102.h5
+    - 03107.h5
+    - 03116.h5
+    - 03122.h5
+    - 03125.h5
+    - 03130.h5
+    - 03133.h5
+    - 03134.h5
+    - 03137.h5
+    - 03139.h5
+    - 03160.h5
+    - 03163.h5
+    - 03172.h5
+    - 03174.h5
+    - 03178.h5
+    - 03179.h5
+    - 03180.h5
+    - 03188.h5
+    - 03189.h5
+    - 03190.h5
+    - 03192.h5
+    - 03193.h5
+    - 03197.h5
+    - 03199.h5
+    - 03200.h5
+    - 03205.h5
+    - 03206.h5
+    - 03211.h5
+    - 03218.h5
+    - 03219.h5
+    - 03222.h5
+    - 03225.h5
+    - 03231.h5
+    - 03246.h5
+    - 03248.h5
+    - 03251.h5
+    - 03253.h5
+    - 03255.h5
+    - 03259.h5
+    - 03263.h5
+    - 03265.h5
+    - 03266.h5
+    - 03273.h5
+    - 03275.h5
+    - 03277.h5
+    - 03278.h5
+    - 03282.h5
+    - 03283.h5
+    - 03302.h5
+    - 03303.h5
+    - 03304.h5
+    - 03307.h5
+    - 03314.h5
+    - 03315.h5
+    - 03327.h5
+    - 03328.h5
+    - 03332.h5
+    - 03336.h5
+    - 03340.h5
+    - 03342.h5
+    - 03343.h5
+    - 03348.h5
+    - 03351.h5
+    - 03354.h5
+    - 03358.h5
+    - 03359.h5
+    - 03360.h5
+    - 03367.h5
+    - 03371.h5
+    - 03374.h5
+    - 03375.h5
+    - 03377.h5
+    - 03378.h5
+    - 03379.h5
+    - 03381.h5
+    - 03382.h5
+    - 03384.h5
+    - 03397.h5
+    - 03403.h5
+    - 03406.h5
+    - 03413.h5
+    - 03425.h5
+    - 03431.h5
+    - 03432.h5
+    - 03435.h5
+    - 03442.h5
+    - 03453.h5
+    - 03454.h5
+    - 03456.h5
+    - 03463.h5
+    - 03465.h5
+    - 03466.h5
+    - 03467.h5
+    - 03469.h5
+    - 03473.h5
+    - 03491.h5
+    - 03492.h5
+    - 03495.h5
+    - 03498.h5
+    - 03501.h5
+    - 03502.h5

unigaze/configs/data/mpiigaze.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+type: datasets.mpiigaze.MPIIGazeDataset
+params:
+  data_name: mpii
+  color_type: bgr
+  transform_type: 'basic_imagenet'
+  dataset_path: null
+  image_size: 224
+  keys_to_use:
+    - p00.h5
+    - p01.h5
+    - p02.h5
+    - p03.h5
+    - p04.h5
+    - p05.h5
+    - p06.h5
+    - p07.h5
+    - p08.h5
+    - p09.h5
+    - p10.h5
+    - p11.h5
+    - p12.h5
+    - p13.h5
+    - p14.h5

unigaze/configs/data/mpiigaze_test.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+type: datasets.mpiigaze.MPIIGazeDataset
+params:
+  data_name: mpii
+  color_type: bgr
+  transform_type: 'basic_imagenet'
+  dataset_path: null
+  image_size: 224
+  keys_to_use:
+    # - p00.h5
+    # - p01.h5
+    # - p02.h5
+    # - p03.h5
+    # - p04.h5
+    # - p05.h5
+    # - p06.h5
+    # - p07.h5
+    # - p08.h5
+    # - p09.h5
+    - p10.h5
+    - p11.h5
+    - p12.h5
+    - p13.h5
+    - p14.h5

unigaze/configs/data/mpiigaze_train.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+type: datasets.mpiigaze.MPIIGazeDataset
+params:
+  data_name: mpii
+  color_type: bgr
+  transform_type: 'basic_imagenet'
+  dataset_path: null
+  image_size: 224
+  keys_to_use:
+    - p00.h5
+    - p01.h5
+    - p02.h5
+    - p03.h5
+    - p04.h5
+    - p05.h5
+    - p06.h5
+    - p07.h5
+    - p08.h5
+    - p09.h5
+    # - p10.h5
+    # - p11.h5
+    # - p12.h5
+    # - p13.h5
+    # - p14.h5

unigaze/configs/data/xgaze_0_60sub.yaml ADDED Viewed

	@@ -0,0 +1,76 @@

+type: datasets.xgaze.XGazeDataset
+params:
+  data_name: xgaze_v2_224
+  images_per_frame: 18
+  color_type: bgr
+  transform_type: 'basic_imagenet'
+  dataset_path: null
+  frame_tag: 'all'
+  image_size: 224
+  keys_to_use:
+    - subject0000.h5
+    - subject0003.h5
+    - subject0004.h5
+    - subject0005.h5
+    - subject0006.h5
+    - subject0007.h5
+    - subject0008.h5
+    - subject0009.h5
+    - subject0010.h5
+    - subject0013.h5
+    - subject0014.h5
+    - subject0015.h5
+    - subject0016.h5
+    - subject0018.h5
+    - subject0019.h5
+    - subject0021.h5
+    - subject0024.h5
+    - subject0026.h5
+    - subject0027.h5
+    - subject0028.h5
+    - subject0029.h5
+    - subject0030.h5
+    - subject0031.h5
+    - subject0032.h5
+    - subject0033.h5
+    - subject0035.h5
+    - subject0036.h5
+    - subject0038.h5
+    - subject0039.h5
+    - subject0040.h5
+    - subject0041.h5
+    - subject0043.h5
+    - subject0044.h5
+    - subject0045.h5
+    - subject0046.h5
+    - subject0048.h5
+    - subject0050.h5
+    - subject0051.h5
+    - subject0052.h5
+    - subject0055.h5
+    - subject0056.h5
+    - subject0057.h5
+    - subject0058.h5
+    - subject0059.h5
+    - subject0060.h5
+    - subject0061.h5
+    - subject0062.h5
+    - subject0063.h5
+    - subject0065.h5
+    - subject0066.h5
+    - subject0067.h5
+    - subject0069.h5
+    - subject0072.h5
+    - subject0073.h5
+    - subject0075.h5
+    - subject0076.h5
+    - subject0078.h5
+    - subject0079.h5
+    - subject0080.h5
+    - subject0081.h5

unigaze/configs/data/xgaze_0_60sub_d3.yaml ADDED Viewed

	@@ -0,0 +1,80 @@

+type: datasets.xgaze.XGazeDataset
+params:
+  data_name: xgaze_v2_224
+  images_per_frame: 18
+  color_type: bgr
+  transform_type: 'basic_imagenet'
+  dataset_path: null
+  frame_tag: 'all'
+  camera_random: 3
+  image_size: 224
+  keys_to_use:
+    - subject0000.h5
+    - subject0003.h5
+    - subject0004.h5
+    - subject0005.h5
+    - subject0006.h5
+    - subject0007.h5
+    - subject0008.h5
+    - subject0009.h5
+    - subject0010.h5
+    - subject0013.h5
+    - subject0014.h5
+    - subject0015.h5
+    - subject0016.h5
+    - subject0018.h5
+    - subject0019.h5
+    - subject0021.h5
+    - subject0024.h5
+    - subject0026.h5
+    - subject0027.h5
+    - subject0028.h5
+    - subject0029.h5
+    - subject0030.h5
+    - subject0031.h5
+    - subject0032.h5
+    - subject0033.h5
+    - subject0035.h5
+    - subject0036.h5
+    - subject0038.h5
+    - subject0039.h5
+    - subject0040.h5
+    - subject0041.h5
+    - subject0043.h5
+    - subject0044.h5
+    - subject0045.h5
+    - subject0046.h5
+    - subject0048.h5
+    - subject0050.h5
+    - subject0051.h5
+    - subject0052.h5
+    - subject0055.h5
+    - subject0056.h5
+    - subject0057.h5
+    - subject0058.h5
+    - subject0059.h5
+    - subject0060.h5
+    - subject0061.h5
+    - subject0062.h5
+    - subject0063.h5
+    - subject0065.h5
+    - subject0066.h5
+    - subject0067.h5
+    - subject0069.h5
+    - subject0072.h5
+    - subject0073.h5
+    - subject0075.h5
+    - subject0076.h5
+    - subject0078.h5
+    - subject0079.h5
+    - subject0080.h5
+    - subject0081.h5

unigaze/configs/data/xgaze_0_80sub.yaml ADDED Viewed

	@@ -0,0 +1,97 @@

+type: datasets.xgaze.XGazeDataset
+params:
+  data_name: xgaze_v2_224
+  images_per_frame: 18
+  color_type: bgr
+  transform_type: 'basic_imagenet'
+  dataset_path: null
+  frame_tag: 'all'
+  image_size: 224
+  keys_to_use:
+    - subject0000.h5
+    - subject0003.h5
+    - subject0004.h5
+    - subject0005.h5
+    - subject0006.h5
+    - subject0007.h5
+    - subject0008.h5
+    - subject0009.h5
+    - subject0010.h5
+    - subject0013.h5
+    - subject0014.h5
+    - subject0015.h5
+    - subject0016.h5
+    - subject0018.h5
+    - subject0019.h5
+    - subject0021.h5
+    - subject0024.h5
+    - subject0026.h5
+    - subject0027.h5
+    - subject0028.h5
+    - subject0029.h5
+    - subject0030.h5
+    - subject0031.h5
+    - subject0032.h5
+    - subject0033.h5
+    - subject0035.h5
+    - subject0036.h5
+    - subject0038.h5
+    - subject0039.h5
+    - subject0040.h5
+    - subject0041.h5
+    - subject0043.h5
+    - subject0044.h5
+    - subject0045.h5
+    - subject0046.h5
+    - subject0048.h5
+    - subject0050.h5
+    - subject0051.h5
+    - subject0052.h5
+    - subject0055.h5
+    - subject0056.h5
+    - subject0057.h5
+    - subject0058.h5
+    - subject0059.h5
+    - subject0060.h5
+    - subject0061.h5
+    - subject0062.h5
+    - subject0063.h5
+    - subject0065.h5
+    - subject0066.h5
+    - subject0067.h5
+    - subject0069.h5
+    - subject0072.h5
+    - subject0073.h5
+    - subject0075.h5
+    - subject0076.h5
+    - subject0078.h5
+    - subject0079.h5
+    - subject0080.h5
+    - subject0081.h5
+    - subject0083.h5
+    - subject0084.h5
+    - subject0085.h5
+    - subject0088.h5
+    - subject0090.h5
+    - subject0092.h5
+    - subject0095.h5
+    - subject0098.h5
+    - subject0099.h5
+    - subject0100.h5
+    - subject0101.h5
+    - subject0102.h5
+    - subject0103.h5
+    - subject0104.h5
+    - subject0105.h5
+    - subject0106.h5
+    - subject0107.h5
+    - subject0108.h5
+    - subject0109.h5
+    - subject0111.h5

unigaze/configs/data/xgaze_0_80sub_d3.yaml ADDED Viewed

	@@ -0,0 +1,97 @@

+type: datasets.xgaze.XGazeDataset
+params:
+  data_name: xgaze_v2_224
+  color_type: bgr
+  images_per_frame: 18
+  transform_type: 'basic_imagenet'
+  dataset_path: null
+  frame_tag: 'all'
+  camera_random: 3
+  image_size: 224
+  keys_to_use:
+    - subject0000.h5
+    - subject0003.h5
+    - subject0004.h5
+    - subject0005.h5
+    - subject0006.h5
+    - subject0007.h5
+    - subject0008.h5
+    - subject0009.h5
+    - subject0010.h5
+    - subject0013.h5
+    - subject0014.h5
+    - subject0015.h5
+    - subject0016.h5
+    - subject0018.h5
+    - subject0019.h5
+    - subject0021.h5
+    - subject0024.h5
+    - subject0026.h5
+    - subject0027.h5
+    - subject0028.h5
+    - subject0029.h5
+    - subject0030.h5
+    - subject0031.h5
+    - subject0032.h5
+    - subject0033.h5
+    - subject0035.h5
+    - subject0036.h5
+    - subject0038.h5
+    - subject0039.h5
+    - subject0040.h5
+    - subject0041.h5
+    - subject0043.h5
+    - subject0044.h5
+    - subject0045.h5
+    - subject0046.h5
+    - subject0048.h5
+    - subject0050.h5
+    - subject0051.h5
+    - subject0052.h5
+    - subject0055.h5
+    - subject0056.h5
+    - subject0057.h5
+    - subject0058.h5
+    - subject0059.h5
+    - subject0060.h5
+    - subject0061.h5
+    - subject0062.h5
+    - subject0063.h5
+    - subject0065.h5
+    - subject0066.h5
+    - subject0067.h5
+    - subject0069.h5
+    - subject0072.h5
+    - subject0073.h5
+    - subject0075.h5
+    - subject0076.h5
+    - subject0078.h5
+    - subject0079.h5
+    - subject0080.h5
+    - subject0081.h5
+    - subject0083.h5
+    - subject0084.h5
+    - subject0085.h5
+    - subject0088.h5
+    - subject0090.h5
+    - subject0092.h5
+    - subject0095.h5
+    - subject0098.h5
+    - subject0099.h5
+    - subject0100.h5
+    - subject0101.h5
+    - subject0102.h5
+    - subject0103.h5
+    - subject0104.h5
+    - subject0105.h5
+    - subject0106.h5
+    - subject0107.h5
+    - subject0108.h5
+    - subject0109.h5
+    - subject0111.h5

unigaze/configs/data/xgaze_60_80sub.yaml ADDED Viewed

	@@ -0,0 +1,31 @@

+type: datasets.xgaze.XGazeDataset
+params:
+  data_name: xgaze_v2_224
+  images_per_frame: 18
+  color_type: bgr
+  transform_type: 'basic_imagenet'
+  dataset_path: null
+  frame_tag: 'all'
+  image_size: 224
+  keys_to_use:
+    - subject0083.h5
+    - subject0084.h5
+    - subject0085.h5
+    - subject0088.h5
+    - subject0090.h5
+    - subject0092.h5
+    - subject0095.h5
+    - subject0098.h5
+    - subject0099.h5
+    - subject0100.h5
+    - subject0101.h5
+    - subject0102.h5
+    - subject0103.h5
+    - subject0104.h5
+    - subject0105.h5
+    - subject0106.h5
+    - subject0107.h5
+    - subject0108.h5
+    - subject0109.h5
+    - subject0111.h5

unigaze/configs/exp/blank.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+exp_name: tbd
+exp_explanation:
+data: null
+## can be overwritten
+model: null
+trainer:  null
+loss:  null
+optimizer: configs/optimizers/default_Adam_e4.yaml
+scheduler: configs/schedulers/default_stepLR_5.yaml

unigaze/configs/exp/cross/train_ED.yaml ADDED Viewed

	@@ -0,0 +1,27 @@

+train:
+  - configs/data/eyediap_cs.yaml
+  - configs/data/eyediap_ft.yaml
+val:
+  - configs/data/mpiigaze_train.yaml
+  - configs/data/mpiigaze_test.yaml
+test:
+  # - configs/data/xgaze_0_80sub.yaml
+  - configs/data/xgaze_0_60sub.yaml
+  - configs/data/xgaze_60_80sub.yaml
+  - configs/data/gazecapture_train.yaml
+  - configs/data/gazecapture_test.yaml
+  - configs/data/gaze360_train.yaml
+  - configs/data/gaze360_test.yaml
+  - configs/data/mpiigaze.yaml
+  - configs/data/our_mpii.yaml