Gaze-LLE

Runtime error

App Files Files Community

Gaze-LLE / data_prep /preprocess_vat.py

fffiloni

Migrated from GitHub

9c9498f verified over 1 year ago

raw

history blame contribute delete

4.94 kB

	import argparse
	import glob
	from functools import reduce
	import os
	import pandas as pd
	import json
	import numpy as np
	from PIL import Image

	parser = argparse.ArgumentParser()
	parser.add_argument("--data_path", type=str, default="./data/videoattentiontarget")
	args = parser.parse_args()

	# preprocessing adapted from https://github.com/ejcgt/attention-target-detection/blob/master/dataset.py

	def merge_dfs(ls):
	for i, df in enumerate(ls): # give columns unique names
	df.columns = [col if col == "path" else f"{col}_df{i}" for col in df.columns]
	merged_df = reduce(
	lambda left, right: pd.merge(left, right, on=["path"], how="outer"), ls
	)
	merged_df = merged_df.sort_values(by=["path"])
	merged_df = merged_df.reset_index(drop=True)
	return merged_df

	def smooth_by_conv(window_size, df, col):
	"""Temporal smoothing on labels to match original VideoAttTarget evaluation.
	Adapted from https://github.com/ejcgt/attention-target-detection/blob/acd264a3c9e6002b71244dea8c1873e5c5818500/utils/myutils.py"""
	values = df[col].values
	padded_track = np.concatenate([values[0].repeat(window_size // 2), values, values[-1].repeat(window_size // 2)])
	smoothed_signals = np.convolve(
	padded_track.squeeze(), np.ones(window_size) / window_size, mode="valid"
	)
	return smoothed_signals

	def smooth_df(window_size, df):
	df["xmin"] = smooth_by_conv(window_size, df, "xmin")
	df["ymin"] = smooth_by_conv(window_size, df, "ymin")
	df["xmax"] = smooth_by_conv(window_size, df, "xmax")
	df["ymax"] = smooth_by_conv(window_size, df, "ymax")
	return df


	def main(PATH):
	# preprocess by sequence and person track
	splits = ["train", "test"]

	for split in splits:
	sequences = []
	max_num_ppl = 0
	seq_idx = 0
	for seq_path in glob.glob(
	os.path.join(PATH, "annotations", split, "", "")
	):
	seq_img_path = os.path.join("images", *seq_path.split("/")[-2:]
	)
	sample_image = os.path.join(PATH, seq_img_path, os.listdir(os.path.join(PATH, seq_img_path))[0])
	width, height = Image.open(sample_image).size
	seq_dict = {"path": seq_img_path, "width": width, "height": height}
	frames = []
	person_files = glob.glob(os.path.join(seq_path, "*"))
	num_ppl = len(person_files)
	if num_ppl > max_num_ppl:
	max_num_ppl = num_ppl
	person_dfs = [
	pd.read_csv(
	file,
	header=None,
	index_col=False,
	names=["path", "xmin", "ymin", "xmax", "ymax", "gazex", "gazey"],
	)
	for file in person_files
	]
	# moving-avg smoothing to match original benchmark's evaluation
	window_size = 11
	person_dfs = [smooth_df(window_size, df) for df in person_dfs]
	merged_df = merge_dfs(person_dfs) # merge annotations per person for same frames
	for frame_idx, row in merged_df.iterrows():
	frame_dict = {
	"path": os.path.join(seq_img_path, row["path"]),
	"heads": [],
	}
	p_idx = 0
	for i in range(1, num_ppl * 6 + 1, 6):
	if not np.isnan(row.iloc[i]): # if it's nan lack of continuity (one person leaving the frame for a period of time)
	xmin, ymin, xmax, ymax, gazex, gazey = row[i: i+6].values.tolist()
	# match original benchmark's preprocessing of annotations
	if gazex >=0 and gazey < 0:
	gazey = 0
	elif gazey >=0 and gazex < 0:
	gazex = 0
	inout = int(gazex >= 0 and gazey >= 0)
	frame_dict["heads"].append({
	"bbox": [xmin, ymin, xmax, ymax],
	"bbox_norm": [xmin / float(width), ymin / float(height), xmax / float(width), ymax / float(height)],
	"gazex": [gazex],
	"gazex_norm": [gazex / float(width)],
	"gazey": [gazey],
	"gazey_norm": [gazey / float(height)],
	"inout": inout
	})
	p_idx = p_idx + 1

	frames.append(frame_dict)
	seq_dict["frames"] = frames
	sequences.append(seq_dict)
	seq_idx += 1

	print("{} max people per image {}".format(split, max_num_ppl))
	print("{} num unique video sequences {}".format(split, len(sequences)))

	out_file = open(os.path.join(PATH, "{}_preprocessed.json".format(split)), "w")
	json.dump(sequences, out_file)

	if __name__ == "__main__":
	main(args.data_path)