Spaces:

bugroup
/

GazeGenie

Sleeping

App Files Files Community

GazeGenie / popEye_funcs.py

hugpv

sync with dev version

8789d72 4 months ago

raw

history blame contribute delete

62.5 kB

	"""
	Mostly adapted from: https://github.com/sascha2schroeder/popEye
	"""

	from copy import deepcopy
	import numpy as np
	import pandas as pd
	from icecream import ic
	from scipy import stats
	import pathlib as pl

	RESULTS_FOLDER = pl.Path("results")


	def compute_velocity(xy):
	samp = 1000

	N = xy.shape[0]
	v = pd.DataFrame(data=np.zeros((N, 3)), columns=["time", "vx", "vy"])
	v["time"] = xy["time"]

	v.iloc[2 : (N - 2), 1:3] = (
	samp
	/ 6
	* (
	xy.iloc[4:N, 1:3].values
	+ xy.iloc[3 : (N - 1), 1:3].values
	- xy.iloc[1 : (N - 3), 1:3].values
	- xy.iloc[0 : (N - 4), 1:3].values
	)
	)
	v.iloc[1, 1:3] = samp / 2 * (xy.iloc[2, 1:3].values - xy.iloc[0, 1:3].values)
	v.iloc[(N - 2), 1:3] = samp / 2 * (xy.iloc[N - 1, 1:3].values - xy.iloc[N - 4, 1:3].values)

	xy = pd.concat([xy.set_index("time"), v.set_index("time")], axis=1).reset_index()
	return xy


	def event_long(events_df):
	events_df["duration"] = events_df["stop"] - events_df["start"]
	events_df = events_df[events_df["duration"] > 0]
	events_df = events_df.drop(columns=["duration"])
	events_df.reset_index(drop=True, inplace=True)
	tmplong_cols = list(events_df.columns)
	tmplong_cols.remove("msg")
	events_df["del"] = 0
	for i in events_df.index:
	if events_df.loc[i, "msg"] == "BLINK":
	if i == 0:
	continue
	for col in tmplong_cols:
	events_df.loc[i, col] = events_df.loc[i - 1, col]
	events_df.loc[i - 1, "del"] = 1

	events_df = events_df[events_df["del"] == 0]
	events_df = events_df.drop(columns=["del"])
	events_df.reset_index(drop=True, inplace=True)
	events_df["num"] = range(len(events_df))
	# compute blinks
	# ---------------

	events_df["blink_before"] = 0
	events_df["blink_after"] = 0

	for i in events_df.index:
	if events_df.loc[i, "msg"] == "BLINK":
	events_df.loc[i - 1, "blink_after"] = 1
	if i < len(events_df) - 1:
	events_df.loc[i + 1, "blink_before"] = 1

	# combine
	events_df["blink"] = (events_df["blink_before"] == 1) \| (events_df["blink_after"] == 1)
	return events_df.copy()


	def compute_non_line_dependent_saccade_measures(saccade_df, trial_dict):

	saccade_df["trial_id"] = trial_dict["trial_id"]
	gaze_df = trial_dict["gaze_df"]
	for s in range(len(saccade_df)):
	is_directional_deviation = False
	a = saccade_df["start_time"][s]
	b = saccade_df["end_time"][s]

	if not gaze_df["x"][[True if (a <= x <= b) else False for x in gaze_df["time"]]].any():
	gaze_df.loc[a:b, "x"] = np.nan

	bool_vec = (gaze_df["time"] >= a) & (gaze_df["time"] <= b)
	if (not gaze_df["x"][bool_vec].isna().any()) and bool_vec.any():
	# saccade amplitude (dX, dY)
	minx = min(gaze_df.loc[bool_vec, "x"])
	maxx = max(gaze_df.loc[bool_vec, "x"])
	if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3":
	miny = min(gaze_df.loc[bool_vec, "y"])
	maxy = max(gaze_df.loc[bool_vec, "y"])
	ix1 = gaze_df.loc[bool_vec, "x"].index[np.argmin(gaze_df.loc[bool_vec, "x"])]
	ix2 = gaze_df.loc[bool_vec, "x"].index[np.argmax(gaze_df.loc[bool_vec, "x"])]
	if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3":
	iy1 = gaze_df.loc[bool_vec, "y"].index[np.argmin(gaze_df.loc[bool_vec, "y"])]
	iy2 = gaze_df.loc[bool_vec, "y"].index[np.argmax(gaze_df.loc[bool_vec, "y"])]
	saccade_df.loc[s, "dX"] = round(np.sign(ix2 - ix1) * (maxx - minx))
	if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3":
	saccade_df.loc[s, "dY"] = round(np.sign(iy2 - iy1) * (maxy - miny))

	# saccade amplitude/angle
	if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3":
	saccade_df.loc[s, "amp_px"] = round(
	np.sqrt(saccade_df.loc[s, "dX"] 2 + saccade_df.loc[s, "dY"] 2)
	)
	saccade_df.loc[s, "amp_angle"] = round(np.arctan2(saccade_df.loc[s, "dY"], saccade_df.loc[s, "dX"]), 2)
	saccade_df.loc[s, "amp_angle_deg"] = round(
	np.arctan2(saccade_df.loc[s, "dY"], saccade_df.loc[s, "dX"]) * (180 / np.pi), 2
	)

	else:
	saccade_df.loc[s, "amp_px"] = np.nan
	saccade_df.loc[s, "amp_angle"] = np.nan
	saccade_df.loc[s, "amp_angle_deg"] = np.nan

	if 35 <= abs(saccade_df.loc[s, "angle"]) <= 145:
	if saccade_df.loc[s, "xe"] - saccade_df.loc[s, "xs"] > 0 and not (
	"blink_before" in saccade_df.columns
	and (saccade_df.loc[s, "blink_before"] or saccade_df.loc[s, "blink_after"])
	):
	is_directional_deviation = True

	saccade_df.loc[s, "is_directional_deviation"] = is_directional_deviation

	return saccade_df


	def compute_saccade_measures(saccade_df, trial_dict, algo_choice):

	if algo_choice is not None:
	algo_str = f"_{algo_choice}"
	else:
	algo_str = ""
	gaze_df = trial_dict["gaze_df"]
	saccade_df.reset_index(drop=True, inplace=True)
	saccade_df.loc[:, f"has_line_change{algo_str}"] = (
	saccade_df.loc[:, f"lines{algo_str}"] != saccade_df.loc[:, f"linee{algo_str}"]
	)
	saccade_df.loc[:, f"goes_to_next_line{algo_str}"] = saccade_df.loc[:, f"linee{algo_str}"] == (
	saccade_df.loc[:, f"lines{algo_str}"] + 1
	)
	saccade_df.loc[:, f"is_directional_deviation{algo_str}"] = False
	saccade_df.loc[:, f"is_return_sweep{algo_str}"] = False

	for sidx, subdf in saccade_df.groupby(f"lines{algo_str}"):
	if subdf.iloc[-1][f"goes_to_next_line{algo_str}"]:
	saccade_df.loc[subdf.index[-1], f"is_return_sweep{algo_str}"] = True

	for s in range(len(saccade_df)):
	is_directional_deviation = False
	a = saccade_df["start_time"][s]
	b = saccade_df["end_time"][s]

	if not gaze_df["x"][[True if (a <= x <= b) else False for x in gaze_df["time"]]].any():
	gaze_df.loc[a:b, "x"] = np.nan

	# saccade distance in letters
	if saccade_df.loc[s, f"lete{algo_str}"] is None or saccade_df.loc[s, f"lets{algo_str}"] is None:
	ic(
	f"None found for compute_saccade_measures at index {s} for subj {trial_dict['subject']} and trial {trial_dict['trial_id']}"
	)
	else:
	saccade_df.loc[s, f"dist_let{algo_str}"] = (
	saccade_df.loc[s, f"lete{algo_str}"] - saccade_df.loc[s, f"lets{algo_str}"]
	)

	bool_vec = (gaze_df["time"] >= a) & (gaze_df["time"] <= b)
	if (not gaze_df["x"][bool_vec].isna().any()) and bool_vec.any():
	# saccade peak velocity (vpeak)
	if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3":
	vx = gaze_df.vx[bool_vec]
	vy = gaze_df.vy[bool_vec]
	if not vx.empty and not vy.empty:
	saccade_df.loc[s, f"peak_vel{algo_str}"] = round(np.nanmax(np.sqrt(vx2 + vy2)))
	else:
	saccade_df.loc[s, f"peak_vel{algo_str}"] = round(np.nanmax(np.sqrt(gaze_df.vx[bool_vec] ** 2)))

	if 35 <= abs(saccade_df.loc[s, f"angle{algo_str}"]) <= 145:
	if saccade_df.loc[s, "xe"] - saccade_df.loc[s, "xs"] > 0 and not (
	"blink_before" in saccade_df.columns
	and (saccade_df.loc[s, "blink_before"] or saccade_df.loc[s, "blink_after"])
	):
	is_directional_deviation = True

	saccade_df.loc[s, f"is_directional_deviation{algo_str}"] = is_directional_deviation
	return saccade_df.copy()


	def get_angle_and_eucl_dist(saccade_df, algo_choice=None):
	if algo_choice is not None:
	algo_str = f"_{algo_choice}"
	else:
	algo_str = ""
	saccade_df["xe_minus_xs"] = saccade_df["xe"] - saccade_df["xs"]
	saccade_df[f"ye_minus_ys{algo_str}"] = saccade_df[f"ye{algo_str}"] - saccade_df[f"ys{algo_str}"]
	saccade_df["eucledian_distance"] = (
	saccade_df["xe_minus_xs"].map(np.square) + saccade_df[f"ye_minus_ys{algo_str}"].map(np.square)
	).map(np.sqrt)
	saccade_df[f"angle{algo_str}"] = np.arctan2(
	saccade_df.loc[:, f"ye_minus_ys{algo_str}"], saccade_df.loc[:, "xe_minus_xs"]
	) * (180 / np.pi)
	return saccade_df


	def compute_saccade_length(dffix, stimulus_df, algo_choice):

	for j in dffix.index:
	if (
	j == 0
	or pd.isna(dffix.at[j, f"line_num_{algo_choice}"])
	or pd.isna(dffix.at[j - 1, f"line_num_{algo_choice}"])
	or dffix.at[j, f"letternum_{algo_choice}"] is None
	or dffix.at[j - 1, f"letternum_{algo_choice}"] is None
	):
	continue

	# Same line, calculate saccade length as difference in letter numbers
	if dffix.at[j - 1, f"line_num_{algo_choice}"] == dffix.at[j, f"line_num_{algo_choice}"]:
	dffix.at[j, f"sac_in_{algo_choice}"] = (
	dffix.at[j, f"letternum_{algo_choice}"] - dffix.at[j - 1, f"letternum_{algo_choice}"]
	)

	# Go to line ahead, calculate saccade length as difference in minimum letter numbers in target and previous lines, respectively
	elif dffix.at[j - 1, f"line_num_{algo_choice}"] < dffix.at[j, f"line_num_{algo_choice}"]:
	min_stim_j = np.min(
	stimulus_df[stimulus_df["assigned_line"] == dffix.at[j, f"line_num_{algo_choice}"]]["letternum"]
	)
	min_stim_j_1 = np.min(
	stimulus_df[stimulus_df["assigned_line"] == dffix.at[j - 1, f"line_num_{algo_choice}"]]["letternum"]
	)
	dffix.at[j, f"sac_in_{algo_choice}"] = (dffix.at[j, f"letternum_{algo_choice}"] - min_stim_j) - (
	dffix.at[j - 1, f"letternum_{algo_choice}"] - min_stim_j_1
	)

	# Return to line visited before, calculate saccade length as difference in minimum letter numbers in target and next lines, respectively
	elif dffix.at[j - 1, f"line_num_{algo_choice}"] > dffix.at[j, f"line_num_{algo_choice}"]:
	min_stim_j_1 = np.min(
	stimulus_df[stimulus_df["assigned_line"] == dffix.at[j - 1, f"line_num_{algo_choice}"]]["letternum"]
	)
	min_stim_j = np.min(
	stimulus_df[stimulus_df["assigned_line"] == dffix.at[j, f"line_num_{algo_choice}"]]["letternum"]
	)
	dffix.at[j, f"sac_in_{algo_choice}"] = (dffix.at[j - 1, f"letternum_{algo_choice}"] - min_stim_j_1) - (
	dffix.at[j, f"letternum_{algo_choice}"] - min_stim_j
	)

	for j in range(len(dffix) - 1):
	if (
	pd.isna(dffix.at[j, f"line_num_{algo_choice}"])
	or pd.isna(dffix.at[j + 1, f"line_num_{algo_choice}"])
	or dffix.at[j + 1, f"letternum_{algo_choice}"] is None
	or dffix.at[j, f"letternum_{algo_choice}"] is None
	):
	continue

	# Same line, calculate saccade length as difference in letter numbers
	if dffix.at[j + 1, f"line_num_{algo_choice}"] == dffix.at[j, f"line_num_{algo_choice}"]:
	dffix.at[j, f"sac_out_{algo_choice}"] = (
	dffix.at[j + 1, f"letternum_{algo_choice}"] - dffix.at[j, f"letternum_{algo_choice}"]
	)

	elif dffix.at[j + 1, f"line_num_{algo_choice}"] > dffix.at[j, f"line_num_{algo_choice}"]:
	min_stim_j_1 = np.min(
	stimulus_df[stimulus_df["assigned_line"] == dffix.at[j + 1, f"line_num_{algo_choice}"]]["letternum"]
	)
	min_stim_j = np.min(
	stimulus_df[stimulus_df["assigned_line"] == dffix.at[j, f"line_num_{algo_choice}"]]["letternum"]
	)
	dffix.at[j, f"sac_out_{algo_choice}"] = (dffix.at[j + 1, f"letternum_{algo_choice}"] - min_stim_j_1) - (
	dffix.at[j, f"letternum_{algo_choice}"] - min_stim_j
	)

	elif dffix.at[j + 1, f"line_num_{algo_choice}"] < dffix.at[j, f"line_num_{algo_choice}"]:
	min_stim_j_1 = np.min(
	stimulus_df[stimulus_df["assigned_line"] == dffix.at[j, f"line_num_{algo_choice}"]]["letternum"]
	)
	min_stim_j = np.min(
	stimulus_df[stimulus_df["assigned_line"] == dffix.at[j + 1, f"line_num_{algo_choice}"]]["letternum"]
	)
	dffix.at[j, f"sac_out_{algo_choice}"] = (dffix.at[j, f"letternum_{algo_choice}"] - min_stim_j) - (
	dffix.at[j + 1, f"letternum_{algo_choice}"] - min_stim_j_1
	)

	return dffix


	def compute_launch_distance(dffix, algo_choice):

	for i in range(1, dffix.shape[0]):
	if pd.isna(dffix.loc[i, f"sac_in_{algo_choice}"]):
	continue

	if dffix.loc[i, f"sac_in_{algo_choice}"] >= 0:
	dffix.loc[i, f"word_launch_{algo_choice}"] = (
	dffix.loc[i, f"sac_in_{algo_choice}"] - dffix.loc[i, f"word_land_{algo_choice}"]
	)

	else:
	dffix.loc[i, f"word_launch_{algo_choice}"] = (
	dffix.loc[i, f"sac_in_{algo_choice}"] + dffix.loc[i - 1, f"word_land_{algo_choice}"]
	)

	return dffix


	def compute_refixation(dffix, algo_choice):
	dffix.loc[:, f"word_refix_{algo_choice}"] = False
	dffix.loc[:, f"sentence_refix_{algo_choice}"] = False
	for j in dffix.index:
	if (
	j == 0
	or pd.isna(dffix.loc[j, f"on_word_number_{algo_choice}"])
	or pd.isna(dffix.loc[j - 1, f"on_word_number_{algo_choice}"])
	):
	continue
	dffix.loc[j, f"word_refix_{algo_choice}"] = (
	dffix.loc[j, f"on_word_number_{algo_choice}"] == dffix.loc[j - 1, f"on_word_number_{algo_choice}"]
	)
	dffix.loc[j, f"sentence_refix_{algo_choice}"] = (
	dffix.loc[j, f"on_sentence_num_{algo_choice}"] == dffix.loc[j - 1, f"on_sentence_num_{algo_choice}"]
	)
	return dffix


	def compute_regression(dffix, algo_choice):
	tmp = dffix.copy()
	tmp.reset_index(drop=True, inplace=True)
	tmp.loc[:, f"word_reg_out_{algo_choice}"] = False
	tmp.loc[:, f"word_reg_in_{algo_choice}"] = False
	tmp.loc[:, f"word_reg_out_to_{algo_choice}"] = float("nan")
	tmp.loc[:, f"word_reg_in_from_{algo_choice}"] = float("nan")
	tmp.loc[:, f"sentence_reg_out_{algo_choice}"] = False
	tmp.loc[:, f"sentence_reg_in_{algo_choice}"] = False
	tmp.loc[:, f"sentence_reg_out_to_{algo_choice}"] = float("nan")
	tmp.loc[:, f"sentence_reg_in_from_{algo_choice}"] = float("nan")

	if len(tmp) > 1:
	for j in range(1, len(tmp)):
	# Skip outliers
	if pd.isnull(tmp.iloc[j][f"on_word_number_{algo_choice}"]) or pd.isnull(
	tmp.iloc[j - 1][f"on_word_number_{algo_choice}"]
	):
	continue

	# Word
	if tmp.iloc[j][f"on_word_number_{algo_choice}"] < tmp.iloc[j - 1][f"on_word_number_{algo_choice}"]:
	tmp.loc[j, f"word_reg_in_{algo_choice}"] = True
	tmp.loc[j - 1, f"word_reg_out_{algo_choice}"] = True
	tmp.loc[j, f"word_reg_in_from_{algo_choice}"] = tmp.iloc[j - 1][f"on_word_number_{algo_choice}"]
	tmp.loc[j - 1, f"word_reg_out_to_{algo_choice}"] = tmp.iloc[j][f"on_word_number_{algo_choice}"]

	# Sentence
	if tmp.iloc[j][f"on_sentence_num_{algo_choice}"] < tmp.iloc[j - 1][f"on_sentence_num_{algo_choice}"]:
	tmp.loc[j, f"sentence_reg_in_{algo_choice}"] = True
	tmp.loc[j - 1, f"sentence_reg_out_{algo_choice}"] = True
	tmp.loc[j, f"sentence_reg_in_from_{algo_choice}"] = tmp.iloc[j - 1][f"on_sentence_num_{algo_choice}"]
	tmp.loc[j - 1, f"sentence_reg_out_to_{algo_choice}"] = tmp.iloc[j][f"on_sentence_num_{algo_choice}"]

	extra_cols = list(set(tmp.columns) - set(dffix.columns))
	# select these columns from tmp and add the 'fixation_number'
	cols_to_add = ["fixation_number"] + extra_cols

	# merge selected columns to dffix with 'outer' how and 'fixation_number' as common key
	dffix = pd.merge(dffix, tmp[cols_to_add], on="fixation_number", how="outer")
	return dffix


	def compute_firstskip(dffix, algo_choice):
	dffix[f"word_firstskip_{algo_choice}"] = 0
	word_mem = []

	dffix[f"sentence_firstskip_{algo_choice}"] = 0
	sentence_mem = []
	dffix.reset_index(inplace=True)
	for j in range(dffix.shape[0]):

	# word
	if (
	dffix.loc[j, f"on_word_number_{algo_choice}"] < np.max(word_mem, initial=0)
	and dffix.loc[j, f"on_word_number_{algo_choice}"] not in word_mem
	):
	dffix.loc[j, f"word_firstskip_{algo_choice}"] = 1

	# sent
	if (
	dffix.loc[j, f"on_sentence_num_{algo_choice}"] < np.max(sentence_mem, initial=0)
	and dffix.loc[j, f"on_sentence_num_{algo_choice}"] not in sentence_mem
	):
	dffix.loc[j, f"sentence_firstskip_{algo_choice}"] = 1

	word_mem.append(dffix.loc[j, f"on_word_number_{algo_choice}"])
	sentence_mem.append(dffix.loc[j, f"on_sentence_num_{algo_choice}"])

	# set NA values for missing line numbers
	dffix.loc[dffix[f"line_num_{algo_choice}"].isna(), f"word_firstskip_{algo_choice}"] = np.nan
	dffix.loc[dffix[f"line_num_{algo_choice}"].isna(), f"sentence_firstskip_{algo_choice}"] = np.nan
	dffix.set_index("index", inplace=True)
	return dffix


	def compute_run(dffix, algo_choice):
	if "fixation_number" not in dffix.columns and "num" in dffix.columns:
	dffix["fixation_number"] = dffix["num"]
	tmp = dffix.copy()
	tmp.reset_index(inplace=True, drop=True)
	# initialize
	tmp.loc[~tmp[f"on_word_{algo_choice}"].isna(), f"word_runid_{algo_choice}"] = 0
	tmp[f"sentence_runid_{algo_choice}"] = 0

	# fixation loop
	if len(tmp) > 1:
	for j in range(1, len(tmp)):

	# word
	if tmp[f"word_reg_in_{algo_choice}"][j] == 1 and tmp[f"word_reg_in_{algo_choice}"][j - 1] != 1:
	tmp.loc[j, f"word_runid_{algo_choice}"] = tmp[f"word_runid_{algo_choice}"][j - 1] + 1
	else:
	tmp.loc[j, f"word_runid_{algo_choice}"] = tmp.loc[j - 1, f"word_runid_{algo_choice}"]

	# sentence
	if tmp[f"sentence_reg_in_{algo_choice}"][j] == 1 and tmp[f"sentence_reg_in_{algo_choice}"][j - 1] != 1:
	tmp.loc[j, f"sentence_runid_{algo_choice}"] = tmp[f"sentence_runid_{algo_choice}"][j - 1] + 1
	else:
	tmp.loc[j, f"sentence_runid_{algo_choice}"] = tmp[f"sentence_runid_{algo_choice}"][j - 1]
	tmp[f"word_runid_{algo_choice}"] = tmp[f"word_runid_{algo_choice}"] - 1
	tmp[f"sentence_runid_{algo_choice}"] = tmp[f"sentence_runid_{algo_choice}"] - 1
	# fixid in word
	tmp[f"word_fix_{algo_choice}"] = tmp.groupby(f"on_word_number_{algo_choice}")["fixation_number"].transform(
	lambda x: stats.rankdata(x, method="min")
	)
	# fixid in sent
	tmp[f"sentence_fix_{algo_choice}"] = tmp.groupby(f"on_sentence_num_{algo_choice}")["fixation_number"].transform(
	lambda x: stats.rankdata(x, method="min")
	)

	# runid in word
	tmp["id"] = tmp[f"on_word_number_{algo_choice}"].astype(str) + ":" + tmp[f"word_runid_{algo_choice}"].astype(str)
	fix_tmp = tmp.copy().drop_duplicates(subset="id")
	fix_tmp[f"word_run_{algo_choice}"] = fix_tmp.groupby(f"on_word_number_{algo_choice}")[
	f"word_runid_{algo_choice}"
	].transform(lambda x: stats.rankdata(x, method="min"))

	if f"word_run_{algo_choice}" in tmp.columns:
	tmp = tmp.drop(columns=[f"word_run_{algo_choice}"])
	tmp = pd.merge(tmp, fix_tmp[["id", f"word_run_{algo_choice}"]], on="id")
	del tmp["id"]
	tmp = tmp.sort_values("fixation_number")

	# runid in sentence
	tmp["id"] = (
	tmp[f"on_sentence_num_{algo_choice}"].astype(str) + ":" + tmp[f"sentence_runid_{algo_choice}"].astype(str)
	)
	fix_tmp = tmp.copy().drop_duplicates(subset="id")
	fix_tmp[f"sentence_run_{algo_choice}"] = fix_tmp.groupby(f"on_sentence_num_{algo_choice}")["id"].transform(
	lambda x: stats.rankdata(x, method="min")
	)
	if f"sentence_run_{algo_choice}" in tmp.columns:
	tmp = tmp.drop(columns=[f"sentence_run_{algo_choice}"])
	tmp = pd.merge(tmp, fix_tmp[["id", f"sentence_run_{algo_choice}"]], on="id")
	del tmp["id"]
	tmp = tmp.sort_values("fixation_number")

	# fixnum in word_run
	tmp["id"] = tmp[f"on_word_number_{algo_choice}"].astype(str) + ":" + tmp[f"word_run_{algo_choice}"].astype(str)
	tmp[f"word_run_fix_{algo_choice}"] = tmp.groupby(["id"])["fixation_number"].rank("first").values
	del tmp["id"]
	tmp = tmp.sort_values("fixation_number")

	# fixnum in sentence_run
	tmp["id"] = tmp[f"on_sentence_num_{algo_choice}"].astype(str) + ":" + tmp[f"sentence_run_{algo_choice}"].astype(str)
	tmp[f"sentence_run_fix_{algo_choice}"] = tmp.groupby(["id"])["fixation_number"].rank("first").values
	del tmp["id"]
	tmp = tmp.sort_values("fixation_number")
	names = [
	"fixation_number",
	f"word_runid_{algo_choice}",
	f"sentence_runid_{algo_choice}",
	f"word_fix_{algo_choice}",
	f"sentence_fix_{algo_choice}",
	f"word_run_{algo_choice}",
	f"sentence_run_{algo_choice}",
	f"word_run_fix_{algo_choice}",
	f"sentence_run_fix_{algo_choice}",
	]
	dffix = pd.merge(dffix, tmp[names], on="fixation_number", how="left")
	return dffix.copy()


	def compute_landing_position(dffix, algo_choice):
	dffix[f"word_cland_{algo_choice}"] = (
	dffix[f"word_land_{algo_choice}"] - (dffix[f"on_word_{algo_choice}"].str.len() + 1) / 2
	)
	return dffix


	def aggregate_words_firstrun(
	fix,
	algo_choice,
	measures_to_calculate=[
	"firstrun_blink",
	"firstrun_skip",
	"firstrun_refix",
	"firstrun_reg_in",
	"firstrun_reg_out",
	"firstrun_dur",
	"firstrun_gopast",
	"firstrun_gopast_sel",
	],
	):
	firstruntmp = fix.loc[fix[f"word_run_{algo_choice}"] == 1].copy()

	firstrun = firstruntmp.drop_duplicates(subset=f"on_word_number_{algo_choice}", keep="first").copy()

	names = [
	"subject",
	"trial_id",
	"item",
	"condition",
	f"on_word_number_{algo_choice}",
	f"on_word_{algo_choice}",
	"fixation_number",
	]
	firstrun = firstrun[names].sort_values(f"on_word_number_{algo_choice}")

	# compute measures
	firstrun[f"firstrun_nfix_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[
	"fixation_number"
	].transform(
	"count"
	) # Required for many other measures
	firstrun[f"firstrun_nfix_{algo_choice}"] = firstrun[f"firstrun_nfix_{algo_choice}"].fillna(0)
	if "firstrun_blink" in measures_to_calculate:
	if "blink" in firstruntmp:
	firstrun[f"firstrun_blink_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[
	"blink"
	].transform("max")
	else:
	firstrun[f"firstrun_blink_{algo_choice}"] = 0

	if "firstrun_skip" in measures_to_calculate:
	firstrun[f"firstrun_skip_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[
	f"word_firstskip_{algo_choice}"
	].transform("max")
	if "firstrun_refix" in measures_to_calculate:
	firstrun[f"firstrun_refix_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[
	f"word_refix_{algo_choice}"
	].transform("max")
	if "firstrun_reg_in" in measures_to_calculate:
	firstrun[f"firstrun_reg_in_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[
	f"word_reg_out_{algo_choice}"
	].transform("max")
	if "firstrun_reg_out" in measures_to_calculate:
	firstrun[f"firstrun_reg_out_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[
	f"word_reg_in_{algo_choice}"
	].transform("max")
	if "firstrun_dur" in measures_to_calculate:
	firstrun[f"firstrun_dur_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[
	"duration"
	].transform("sum")
	firstrun = firstrun.sort_values(["trial_id", f"on_word_number_{algo_choice}"]).copy()

	return firstrun


	def compute_gopast_word(fixations_dataframe, algo_choice):

	ias = np.unique(fixations_dataframe.loc[:, f"on_word_number_{algo_choice}"])

	for j in range(len(ias) - 1):
	fixations_dataframe.loc[
	(fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]), f"gopast_{algo_choice}"
	] = np.nansum(
	fixations_dataframe.loc[
	(
	fixations_dataframe["fixation_number"]
	>= np.min(
	fixations_dataframe.loc[
	(fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]), "fixation_number"
	]
	)
	)
	& (
	fixations_dataframe["fixation_number"]
	< np.min(
	fixations_dataframe.loc[
	(fixations_dataframe[f"on_word_number_{algo_choice}"] > ias[j]), "fixation_number"
	]
	)
	)
	& (~fixations_dataframe[f"on_word_number_{algo_choice}"].isna())
	]["duration"]
	)

	fixations_dataframe.loc[
	(fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]), f"selgopast_{algo_choice}"
	] = np.nansum(
	fixations_dataframe.loc[
	(
	fixations_dataframe["fixation_number"]
	>= np.min(
	fixations_dataframe.loc[
	(fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]), "fixation_number"
	]
	)
	)
	& (
	fixations_dataframe["fixation_number"]
	< np.min(
	fixations_dataframe.loc[
	(fixations_dataframe[f"on_word_number_{algo_choice}"] > ias[j]), "fixation_number"
	]
	)
	)
	& (fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j])
	& (~fixations_dataframe[f"on_word_number_{algo_choice}"].isna())
	]["duration"]
	)
	return fixations_dataframe


	def aggregate_words(
	fix,
	word_item,
	algo_choice,
	measures_to_calculate=[
	"blink",
	],
	):
	wordtmp = fix.copy()

	word = wordtmp.drop_duplicates(subset=f"on_word_number_{algo_choice}", keep="first").copy()
	names = [
	f"on_sentence_num_{algo_choice}",
	f"on_word_number_{algo_choice}",
	f"on_word_{algo_choice}",
	]
	word = word.loc[:, names].sort_values(by=f"on_word_number_{algo_choice}")

	wordtmp = compute_gopast_word(wordtmp, algo_choice)

	if "blink" in measures_to_calculate:
	if "blink" in wordtmp:
	word[f"blink_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")["blink"].transform("max")
	else:
	word[f"blink_{algo_choice}"] = 0
	if "nrun" in measures_to_calculate or "reread" in measures_to_calculate:
	word[f"nrun_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[
	f"word_run_{algo_choice}"
	].transform("max")
	if "reread" in measures_to_calculate:
	word[f"reread_{algo_choice}"] = word[f"nrun_{algo_choice}"] > 1
	word[f"number_of_fixations_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[
	"fixation_number"
	].transform("count")
	if "refix" in measures_to_calculate:
	word[f"refix_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[
	f"word_refix_{algo_choice}"
	].transform("max")
	if "reg_in" in measures_to_calculate:
	word[f"reg_in_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[
	f"word_reg_in_{algo_choice}"
	].transform("max")
	if "reg_out" in measures_to_calculate:
	word[f"reg_out_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[
	f"word_reg_out_{algo_choice}"
	].transform("max")
	if "total_fixation_duration" in measures_to_calculate:
	word[f"total_fixation_duration_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[
	"duration"
	].transform("sum")
	if "gopast" in measures_to_calculate and f"gopast_{algo_choice}" in wordtmp.columns:
	word[f"gopast_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[
	f"gopast_{algo_choice}"
	].transform("max")
	word[f"gopast_{algo_choice}"] = word[f"gopast_{algo_choice}"].fillna(0)

	if "gopast_sel" in measures_to_calculate and f"selgopast_{algo_choice}" in wordtmp.columns:
	word[f"gopast_sel_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[
	f"selgopast_{algo_choice}"
	].transform("max")
	word[f"gopast_sel_{algo_choice}"] = word[f"gopast_sel_{algo_choice}"].fillna(0)

	word.rename({f"on_word_number_{algo_choice}": "word_number"}, axis=1, inplace=True)
	word = pd.merge(
	word.reset_index(drop=True), word_item.reset_index(drop=True), on="word_number", how="right", validate="1:1"
	)
	word[f"number_of_fixations_{algo_choice}"] = word[f"number_of_fixations_{algo_choice}"].fillna(0)
	if "total_fixation_duration" in measures_to_calculate:
	word[f"total_fixation_duration_{algo_choice}"] = word[f"total_fixation_duration_{algo_choice}"].fillna(0)

	word[f"skip_{algo_choice}"] = 0
	if "blink" in measures_to_calculate:
	word.loc[word[f"blink_{algo_choice}"].isna(), f"skip_{algo_choice}"] = 1
	word.loc[word[f"number_of_fixations_{algo_choice}"] == 0, f"skip_{algo_choice}"] = 1
	word[f"skip_{algo_choice}"] = word[f"skip_{algo_choice}"].astype("boolean")

	if "number_of_fixations" not in measures_to_calculate:
	word = word.drop(columns=f"number_of_fixations_{algo_choice}")
	if "blink" in measures_to_calculate:
	word[f"blink_{algo_choice}"] = word[f"blink_{algo_choice}"].astype("boolean")

	word = word.sort_values(by=["word_number"])

	if "condition" in wordtmp.columns and "condition" not in word.columns:
	word.insert(loc=0, column="condition", value=wordtmp["condition"].iloc[0])
	if "item" in wordtmp.columns and "item" not in word.columns:
	word.insert(loc=0, column="item", value=wordtmp["item"].iloc[0])
	if "trial_id" in wordtmp.columns and "trial_id" not in word.columns:
	word.insert(loc=0, column="trial_id", value=wordtmp["trial_id"].iloc[0])
	if "subject" in wordtmp.columns and "subject" not in word.columns:
	word.insert(loc=0, column="subject", value=wordtmp["subject"].iloc[0])

	return word


	def combine_words(fix, wordfirst, wordtmp, algo_choice, measures_to_calculate):

	subject = wordtmp["subject"].values[0]
	trial_id = wordtmp["trial_id"].values[0]
	item = wordtmp["item"].values[0]
	condition = wordtmp["condition"].values[0]
	wordtmp = wordtmp.loc[
	:,
	[
	c
	for c in [
	"word_number",
	"word",
	f"blink_{algo_choice}",
	f"skip_{algo_choice}",
	f"nrun_{algo_choice}",
	f"reread_{algo_choice}",
	f"number_of_fixations_{algo_choice}",
	f"refix_{algo_choice}",
	f"reg_in_{algo_choice}",
	f"reg_out_{algo_choice}",
	f"total_fixation_duration_{algo_choice}",
	f"gopast_{algo_choice}",
	f"gopast_sel_{algo_choice}",
	]
	if c in wordtmp.columns
	],
	]

	wordfirsttmp = wordfirst.loc[
	:,
	[
	c
	for c in [
	f"on_word_number_{algo_choice}",
	f"firstrun_skip_{algo_choice}",
	f"firstrun_nfix_{algo_choice}",
	f"firstrun_refix_{algo_choice}",
	f"firstrun_reg_in_{algo_choice}",
	f"firstrun_reg_out_{algo_choice}",
	f"firstrun_dur_{algo_choice}",
	f"firstrun_gopast_{algo_choice}",
	f"firstrun_gopast_sel_{algo_choice}",
	]
	if c in wordfirst.columns
	],
	]

	fixtmp = fix[(fix[f"word_run_{algo_choice}"] == 1) & (fix[f"word_run_fix_{algo_choice}"] == 1)].copy()
	names = [
	c
	for c in [
	f"on_word_number_{algo_choice}",
	f"sac_in_{algo_choice}",
	f"sac_out_{algo_choice}",
	f"word_launch_{algo_choice}",
	f"word_land_{algo_choice}",
	f"word_cland_{algo_choice}",
	f"duration",
	]
	if c in fixtmp.columns
	]
	fixtmp = fixtmp[names].copy()
	fixtmp.rename(
	{
	f"sac_in_{algo_choice}": f"firstfix_sac_in_{algo_choice}",
	f"sac_out_{algo_choice}": f"firstfix_sac_out_{algo_choice}",
	f"word_launch_{algo_choice}": f"firstfix_launch_{algo_choice}",
	f"word_land_{algo_choice}": f"firstfix_land_{algo_choice}",
	f"word_cland_{algo_choice}": f"firstfix_cland_{algo_choice}",
	f"duration": f"firstfix_dur_{algo_choice}",
	},
	axis=1,
	inplace=True,
	)
	comb = pd.merge(
	pd.merge(
	wordtmp,
	wordfirsttmp.rename({f"on_word_number_{algo_choice}": "word_number"}, axis=1),
	on="word_number",
	how="left",
	),
	fixtmp.rename({f"on_word_number_{algo_choice}": "word_number"}, axis=1),
	on="word_number",
	how="left",
	)

	dropcols = [
	c
	for c in [
	f"firstrun_skip_{algo_choice}",
	f"firstrun_refix_{algo_choice}",
	f"firstrun_reg_in_{algo_choice}",
	f"firstrun_reg_out_{algo_choice}",
	f"firstrun_dur_{algo_choice}",
	f"firstrun_gopast_{algo_choice}",
	f"firstrun_gopast_sel_{algo_choice}",
	f"firstfix_sac_in_{algo_choice}",
	f"firstfix_sac_out_{algo_choice}",
	f"firstfix_launch_{algo_choice}",
	f"firstfix_land_{algo_choice}",
	f"firstfix_cland_{algo_choice}",
	f"firstfix_dur_{algo_choice}",
	]
	if ((c.replace(f"_{algo_choice}", "") not in measures_to_calculate) & (c in comb.columns))
	]
	comb = comb.drop(columns=dropcols).copy()
	comb.sort_values(by="word_number", inplace=True)

	# recompute firstrun skip (skips are also firstkips)
	if f"skip_{algo_choice}" in comb.columns and f"firstrun_skip_{algo_choice}" in comb.columns:
	comb.loc[comb[f"skip_{algo_choice}"] == 1, f"firstrun_skip_{algo_choice}"] = 1

	# gopast time in firstrun
	if f"gopast_{algo_choice}" in comb.columns and "firstrun_gopast" in measures_to_calculate:
	comb[f"firstrun_gopast_{algo_choice}"] = comb[f"gopast_{algo_choice}"]
	if f"gopast_sel_{algo_choice}" in comb.columns and "firstrun_gopast_sel" in measures_to_calculate:
	comb[f"firstrun_gopast_sel_{algo_choice}"] = comb[f"gopast_sel_{algo_choice}"]
	if f"gopast_{algo_choice}" in comb.columns:
	comb.drop(columns=[f"gopast_{algo_choice}"], inplace=True)

	if f"gopast_sel_{algo_choice}" in comb.columns:
	comb.drop(columns=[f"gopast_sel_{algo_choice}"], inplace=True)

	if f"firstrun_nfix_{algo_choice}" in comb.columns and "singlefix" in measures_to_calculate:
	comb[f"singlefix_{algo_choice}"] = 0
	comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_{algo_choice}"] = 1

	if f"firstfix_sac_in_{algo_choice}" in comb.columns and "singlefix_sac_in" in measures_to_calculate:
	comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_sac_in_{algo_choice}"] = comb[
	f"firstfix_sac_in_{algo_choice}"
	][(comb[f"firstrun_nfix_{algo_choice}"] == 1)]

	if f"firstfix_sac_out_{algo_choice}" in comb.columns and "singlefix_sac_out" in measures_to_calculate:
	comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_sac_out_{algo_choice}"] = comb[
	f"firstfix_sac_out_{algo_choice}"
	][(comb[f"firstrun_nfix_{algo_choice}"] == 1)]

	if f"firstfix_launch_{algo_choice}" in comb.columns and "singlefix_launch" in measures_to_calculate:
	comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_launch_{algo_choice}"] = comb[
	f"firstfix_launch_{algo_choice}"
	][(comb[f"firstrun_nfix_{algo_choice}"] == 1)]

	if f"firstfix_land_{algo_choice}" in comb.columns and "singlefix_land" in measures_to_calculate:
	comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_land_{algo_choice}"] = comb[
	f"firstfix_land_{algo_choice}"
	][(comb[f"firstrun_nfix_{algo_choice}"] == 1)]

	if f"firstfix_cland_{algo_choice}" in comb.columns and "singlefix_cland" in measures_to_calculate:
	comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_cland_{algo_choice}"] = comb[
	f"firstfix_cland_{algo_choice}"
	][(comb[f"firstrun_nfix_{algo_choice}"] == 1)]

	if f"firstfix_dur_{algo_choice}" in comb.columns and "singlefix_dur" in measures_to_calculate:
	comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_dur_{algo_choice}"] = comb[
	f"firstfix_dur_{algo_choice}"
	][(comb[f"firstrun_nfix_{algo_choice}"] == 1)]

	if "condition" not in comb.columns:
	comb.insert(loc=0, column="condition", value=condition)
	if "item" not in comb.columns:
	comb.insert(loc=0, column="item", value=item)
	if "trial_id" not in comb.columns:
	comb.insert(loc=0, column="trial_id", value=trial_id)
	if "subject" not in comb.columns:
	comb.insert(loc=0, column="subject", value=subject)
	return comb.copy()


	def compute_sentence_measures(fix, stimmat, algo_choice, measures_to_calc, save_to_csv=False):
	sentitem = deepcopy(stimmat).drop_duplicates(
	subset="in_sentence_number", keep="first"
	) # TODO check why there are rows with sent number None
	fixin = fix.copy().reset_index(drop=True)
	unique_trial_ids = fixin['trial_id'].unique()
	if len(unique_trial_ids) == 1:
	sentitem.loc[:,'trial_id'] = unique_trial_ids[0]
	fixin["on_sentence_num2"] = fixin[f"on_sentence_num_{algo_choice}"].copy()

	# Recompute sentence number (two fixation exception rule)
	for j in range(1, len(fixin) - 1):
	if fixin.loc[j, "on_sentence_num2"] != fixin.loc[j - 1, "on_sentence_num2"]:
	if j + 1 in fixin.index and fixin.loc[j + 1, "on_sentence_num2"] == fixin.loc[j - 1, "on_sentence_num2"]:
	fixin.loc[j, "on_sentence_num2"] = fixin.loc[j - 1, "on_sentence_num2"]
	elif j + 2 in fixin.index and fixin.loc[j + 2, "on_sentence_num2"] == fixin.loc[j - 1, "on_sentence_num2"]:
	fixin.loc[j, "on_sentence_num2"] = fixin.loc[j - 1, "on_sentence_num2"]

	fixin["id"] = fixin.apply(lambda row: f"{row['on_sentence_num2']}", axis=1)

	fixin[f"sent_reg_in2_{algo_choice}"] = 0
	fixin[f"sent_reg_out2_{algo_choice}"] = 0

	fixin[f"sent_runid2_{algo_choice}"] = 1

	fixin.loc[0, "last"] = fixin.loc[0, "id"]
	fixin.loc[0, f"firstpass_{algo_choice}"] = 1
	mem = [fixin.loc[0, "on_sentence_num2"]]
	wordmem = [fixin.loc[0, f"on_word_number_{algo_choice}"]]
	fixin.loc[0, f"forward_{algo_choice}"] = 1

	for j in range(1, len(fixin)):
	fixin.loc[j, "last"] = fixin.loc[j - 1, "id"]

	if fixin.loc[j, "on_sentence_num2"] != fixin.loc[j - 1, "on_sentence_num2"]:
	fixin.loc[j, f"sent_reg_in2_{algo_choice}"] = 1
	fixin.loc[j - 1, f"sent_reg_out2_{algo_choice}"] = 1
	fixin.loc[j, f"sent_reg_in_from2_{algo_choice}"] = fixin.loc[j - 1, "on_sentence_num2"]
	fixin.loc[j - 1, f"sent_reg_out_to2_{algo_choice}"] = fixin.loc[j, "on_sentence_num2"]

	if fixin.loc[j, f"sent_reg_in2_{algo_choice}"] == 1 and fixin.loc[j - 1, f"sent_reg_in2_{algo_choice}"] != 1:
	fixin.loc[j, f"sent_runid2_{algo_choice}"] = fixin.loc[j - 1, f"sent_runid2_{algo_choice}"] + 1
	else:
	fixin.loc[j, f"sent_runid2_{algo_choice}"] = fixin.loc[j - 1, f"sent_runid2_{algo_choice}"]

	if fixin.loc[j, "on_sentence_num2"] >= fixin.loc[j - 1, "on_sentence_num2"]:
	if fixin.loc[j, "on_sentence_num2"] in mem:
	if fixin.loc[j, "on_sentence_num2"] == max(mem):
	fixin.loc[j, f"firstpass_{algo_choice}"] = 1
	else:
	fixin.loc[j, f"firstpass_{algo_choice}"] = 0
	else:
	mem.append(fixin.loc[j, "on_sentence_num2"])
	fixin.loc[j, f"firstpass_{algo_choice}"] = 1
	else:
	fixin.loc[j, f"firstpass_{algo_choice}"] = 0

	if fixin.loc[j, f"on_word_number_{algo_choice}"] > max(wordmem):
	wordmem.append(fixin.loc[j, f"on_word_number_{algo_choice}"])
	fixin.loc[j, f"forward_{algo_choice}"] = 1
	elif fixin.loc[j, f"on_word_number_{algo_choice}"] < max(wordmem):
	fixin.loc[j, f"forward_{algo_choice}"] = 0

	for i in range(len(fixin) - 3):
	if fixin.loc[i, f"line_change_{algo_choice}"] > 0:
	fixin.loc[i, "on_word_number"] = 0
	fixin.loc[i + 1, f"forward_{algo_choice}"] = 1
	fixin.loc[i + 2, f"forward_{algo_choice}"] = 1
	fixin.loc[i + 3, f"forward_{algo_choice}"] = 1

	for i in range(1, len(fixin) - 3):
	if fixin.loc[i, "on_sentence_num2"] > fixin.loc[i - 1, "on_sentence_num2"]:
	fixin.loc[i + 1, f"forward_{algo_choice}"] = 1
	fixin.loc[i + 2, f"forward_{algo_choice}"] = 1

	fixin["id2"] = fixin["id"] + ":" + fixin[f"sent_runid2_{algo_choice}"].astype(str)

	fixin = fixin.sort_values(["trial_id", "fixation_number"])

	sent = fixin.copy().drop_duplicates(subset="id", keep="first")
	names = [
	"id",
	"subject",
	"trial_id",
	"item",
	"condition",
	"on_sentence_num2",
	f"on_sentence_num_{algo_choice}",
	f"on_sentence_{algo_choice}",
	"num_words_in_sentence",
	]
	sent = sent[names].reset_index(drop=True)

	sent[f"firstrun_skip_{algo_choice}"] = 0

	mem = []
	for j in range(len(sent)):
	if not pd.isna(sent.loc[j, f"on_sentence_num_{algo_choice}"]):
	if len(mem) > 0 and sent.loc[j, f"on_sentence_num_{algo_choice}"] < max(mem) and not pd.isna(max(mem)):
	sent.loc[j, f"firstrun_skip_{algo_choice}"] = 1
	if (
	not pd.isna(sent.loc[j, f"on_sentence_num_{algo_choice}"])
	and sent.loc[j, f"on_sentence_num_{algo_choice}"] not in mem
	):
	mem.append(sent.loc[j, f"on_sentence_num_{algo_choice}"])

	if "total_n_fixations" in measures_to_calc:
	tmp = fixin.groupby("id")["duration"].count().reset_index()
	tmp.columns = ["id", f"total_n_fixations_{algo_choice}"]
	sent = pd.merge(sent, tmp, on="id", how="left")
	sent.fillna({f"total_n_fixations_{algo_choice}": 0}, inplace=True)

	tmp = fixin.groupby("id")["duration"].sum().reset_index()
	tmp.columns = ["id", f"total_dur_{algo_choice}"]
	sent = pd.merge(sent, tmp, on="id", how="left")
	sent.fillna({f"total_dur_{algo_choice}": 0}, inplace=True)

	if "firstpass_n_fixations" in measures_to_calc:
	tmp = fixin[fixin[f"firstpass_{algo_choice}"] == 1].groupby("id")["duration"].count().reset_index()
	tmp.columns = ["id", f"firstpass_n_fixations_{algo_choice}"]
	sent = pd.merge(sent, tmp, on="id", how="left")
	sent.fillna({f"firstpass_n_fixations_{algo_choice}": 0}, inplace=True)

	if "firstpass_dur" in measures_to_calc:
	tmp = fixin[fixin[f"firstpass_{algo_choice}"] == 1].groupby("id")["duration"].sum().reset_index()
	tmp.columns = ["id", f"firstpass_dur_{algo_choice}"]
	sent = pd.merge(sent, tmp, on="id", how="left")
	sent.fillna({f"firstpass_dur_{algo_choice}": 0}, inplace=True)

	if "firstpass_forward_n_fixations" in measures_to_calc:
	tmp = (
	fixin[(fixin[f"firstpass_{algo_choice}"] == 1) & (fixin[f"forward_{algo_choice}"] == 1)]
	.groupby("id")["duration"]
	.count()
	.reset_index()
	)
	tmp.columns = ["id", f"firstpass_forward_n_fixations_{algo_choice}"]
	sent = pd.merge(sent, tmp, on="id", how="left")
	sent.fillna({f"firstpass_forward_n_fixations_{algo_choice}": 0}, inplace=True)

	if "firstpass_forward_dur" in measures_to_calc:
	tmp = (
	fixin[(fixin[f"firstpass_{algo_choice}"] == 1) & (fixin[f"forward_{algo_choice}"] == 1)]
	.groupby("id")["duration"]
	.sum()
	.reset_index()
	)
	tmp.columns = ["id", f"firstpass_forward_dur_{algo_choice}"]
	sent = pd.merge(sent, tmp, on="id", how="left")
	sent.fillna({f"firstpass_forward_dur_{algo_choice}": 0}, inplace=True)

	if "firstpass_reread_n_fixations" in measures_to_calc:
	tmp = (
	fixin[(fixin[f"firstpass_{algo_choice}"] == 1) & (fixin[f"forward_{algo_choice}"] == 0)]
	.groupby("id")["duration"]
	.count()
	.reset_index()
	)
	tmp.columns = ["id", f"firstpass_reread_n_fixations_{algo_choice}"]
	sent = pd.merge(sent, tmp, on="id", how="left")
	sent.fillna({f"firstpass_reread_n_fixations_{algo_choice}": 0}, inplace=True)

	if "firstpass_reread_dur" in measures_to_calc:
	tmp = (
	fixin[(fixin[f"firstpass_{algo_choice}"] == 1) & (fixin[f"forward_{algo_choice}"] == 0)]
	.groupby("id")["duration"]
	.sum()
	.reset_index()
	)
	tmp.columns = ["id", f"firstpass_reread_dur_{algo_choice}"]
	sent = pd.merge(sent, tmp, on="id", how="left")
	sent.fillna({f"firstpass_reread_dur_{algo_choice}": 0}, inplace=True)

	if sum(fixin[f"firstpass_{algo_choice}"] == 0) != 0:
	if "lookback_n_fixations" in measures_to_calc:
	tmp = fixin[fixin[f"firstpass_{algo_choice}"] == 0].groupby("id")["duration"].count().reset_index()
	tmp.columns = ["id", f"lookback_n_fixations_{algo_choice}"]
	sent = pd.merge(sent, tmp, on="id", how="left")
	sent.fillna({f"lookback_n_fixations_{algo_choice}": 0}, inplace=True)

	if "lookback_dur" in measures_to_calc:
	tmp = fixin[fixin[f"firstpass_{algo_choice}"] == 0].groupby("id")["duration"].sum().reset_index()
	tmp.columns = ["id", f"lookback_dur_{algo_choice}"]
	sent = pd.merge(sent, tmp, on="id", how="left")
	sent.fillna({f"lookback_dur_{algo_choice}": 0}, inplace=True)

	fixin["id2"] = fixin.apply(lambda row: f"{row['id']}:{row[f'sent_runid2_{algo_choice}']}", axis=1)
	sent2 = fixin.drop_duplicates(subset="id2", keep="first")
	sent3 = sent2[(sent2[f"firstpass_{algo_choice}"] == 0) & (~pd.isna(sent2[f"sent_reg_in_from2_{algo_choice}"]))]

	tmp = fixin[fixin["id2"].isin(sent3["id2"])].groupby("id")["duration"].count().reset_index()
	tmp.columns = ["id", f"lookfrom_n_fixations_{algo_choice}"]
	tmp2 = pd.merge(tmp, sent3)
	tmp3 = tmp2.groupby("last")[f"lookfrom_n_fixations_{algo_choice}"].sum().reset_index()
	tmp3.columns = ["last", f"lookfrom_n_fixations_{algo_choice}"]
	sent = pd.merge(sent, tmp3, left_on="id", right_on="last", how="left")
	sent.fillna({f"lookfrom_n_fixations_{algo_choice}": 0}, inplace=True)

	if "lookfrom_dur" in measures_to_calc:
	tmp = fixin[fixin["id2"].isin(sent3["id2"])].groupby("id")["duration"].sum().reset_index()
	tmp.columns = ["id", f"lookfrom_dur_{algo_choice}"]
	tmp2 = pd.merge(tmp, sent3)
	tmp3 = tmp2.groupby("last")[f"lookfrom_dur_{algo_choice}"].sum().reset_index()
	tmp3.columns = ["last", f"lookfrom_dur_{algo_choice}"]
	sent = pd.merge(sent, tmp3, left_on="id", right_on="last", how="left")
	sent.fillna({f"lookfrom_dur_{algo_choice}": 0}, inplace=True)

	# Firstrun
	firstruntmp = fixin[fixin[f"sentence_run_{algo_choice}"] == 1]

	if "firstrun_reg_in" in measures_to_calc:
	tmp = firstruntmp.groupby("id")[f"sent_reg_in2_{algo_choice}"].max().reset_index()
	tmp.columns = ["id", f"firstrun_reg_in_{algo_choice}"]
	sent = pd.merge(sent, tmp, on="id", how="left")
	sent.fillna({f"firstrun_reg_in_{algo_choice}": 0}, inplace=True)

	if "firstrun_reg_out" in measures_to_calc:
	tmp = firstruntmp.groupby("id")[f"sent_reg_out2_{algo_choice}"].max().reset_index()
	tmp.columns = ["id", f"firstrun_reg_out_{algo_choice}"]
	sent = pd.merge(sent, tmp, on="id", how="left")
	sent.fillna({f"firstrun_reg_out_{algo_choice}": 0}, inplace=True)

	# Complete sentence
	gopasttmp = fixin.copy()
	gopasttmp[f"on_sentence_num_{algo_choice}"] = gopasttmp["on_sentence_num2"]
	tmp = compute_gopast_sentence(gopasttmp, algo_choice)
	names = ["id", f"gopast_{algo_choice}", f"selgopast_{algo_choice}"]
	tmp = tmp[names]
	tmp = tmp.drop_duplicates(subset="id", keep="first")
	tmp.columns = ["id", f"gopast_{algo_choice}", f"gopast_sel_{algo_choice}"]
	sent = pd.merge(sent, tmp, on="id", how="left")

	# Nrun
	tmp = fixin.groupby("id")[f"sentence_run_{algo_choice}"].max().reset_index()
	tmp.columns = ["id", f"nrun_{algo_choice}"]
	sent = pd.merge(sent, tmp, on="id", how="left")

	# Reread
	sent[f"reread_{algo_choice}"] = sent.apply(lambda row: 1 if row[f"nrun_{algo_choice}"] > 1 else 0, axis=1)

	# Reg_in
	tmp = fixin.groupby("id")[f"sent_reg_in2_{algo_choice}"].max().reset_index()
	tmp.columns = ["id", f"reg_in_{algo_choice}"]
	sent = pd.merge(sent, tmp, on="id", how="left")

	# Reg_out
	tmp = fixin.groupby("id")[f"sent_reg_out2_{algo_choice}"].max().reset_index()
	tmp.columns = ["id", f"reg_out_{algo_choice}"]
	sent = pd.merge(sent, tmp, on="id", how="left")

	sent = sent.sort_values(by=f"on_sentence_num_{algo_choice}").reset_index(drop=True)

	# Rate
	sent[f"rate_{algo_choice}"] = round(60000 / (sent[f"total_dur_{algo_choice}"] / sent["num_words_in_sentence"]))

	# Write out
	item = sentitem.copy()

	sent = pd.merge(
	sent,
	item.rename({"in_sentence_number": f"on_sentence_num_{algo_choice}"}, axis=1),
	on=f"on_sentence_num_{algo_choice}",
	how="left",
	suffixes=['','from_item']
	)
	sent[f"skip_{algo_choice}"] = 0
	sent.loc[pd.isna(sent[f"nrun_{algo_choice}"]), f"skip_{algo_choice}"] = 1

	names = [
	"subject",
	"trial_id",
	"item",
	"condition",
	] + [
	c
	for c in [
	f"on_sentence_num_{algo_choice}",
	f"on_sentence_{algo_choice}",
	"num_words_in_sentence",
	f"skip_{algo_choice}",
	f"nrun_{algo_choice}",
	f"reread_{algo_choice}",
	f"reg_in_{algo_choice}",
	f"reg_out_{algo_choice}",
	f"total_n_fixations_{algo_choice}",
	f"total_dur_{algo_choice}",
	f"rate_{algo_choice}",
	f"gopast_{algo_choice}",
	f"gopast_sel_{algo_choice}",
	f"firstrun_skip_{algo_choice}",
	f"firstrun_reg_in_{algo_choice}",
	f"firstrun_reg_out_{algo_choice}",
	f"firstpass_n_fixations_{algo_choice}",
	f"firstpass_dur_{algo_choice}",
	f"firstpass_forward_n_fixations_{algo_choice}",
	f"firstpass_forward_dur_{algo_choice}",
	f"firstpass_reread_n_fixations_{algo_choice}",
	f"firstpass_reread_dur_{algo_choice}",
	f"lookback_n_fixations_{algo_choice}",
	f"lookback_dur_{algo_choice}",
	f"lookfrom_n_fixations_{algo_choice}",
	f"lookfrom_dur_{algo_choice}",
	]
	if (c in sent.columns and c.replace(f"_{algo_choice}", "") in measures_to_calc)
	]
	sent = sent[names].copy()
	sent.rename(
	{
	f"on_sentence_num_{algo_choice}": "sentence_number",
	f"on_sentence_{algo_choice}": "sentence",
	"num_words_in_sentence": "number_of_words",
	},
	axis=1,
	inplace=True,
	)

	if save_to_csv:
	subj = fix["subject"].iloc[0]
	trial_id = fix["trial_id"].iloc[0]
	sent.to_csv(RESULTS_FOLDER / f"{subj}_{trial_id}_{algo_choice}_sentence_measures.csv")
	return sent.copy()


	def compute_gopast_sentence(fixin, algo_choice):
	# create response vectors
	fixin[f"gopast_{algo_choice}"] = np.nan
	fixin[f"selgopast_{algo_choice}"] = np.nan

	# compute trialid within person
	ias = fixin[f"on_sentence_num_{algo_choice}"].unique()

	# compute measures
	for j in ias:
	min_fixation_number_j = fixin.loc[fixin[f"on_sentence_num_{algo_choice}"] == j, "fixation_number"].min(
	skipna=True
	)
	next_min_fixation_number = (
	fixin.loc[fixin[f"on_sentence_num_{algo_choice}"] > j, "fixation_number"].min(skipna=True)
	if j != ias[-1]
	else float("inf")
	)

	mask = (
	(fixin["fixation_number"] >= min_fixation_number_j)
	& (fixin["fixation_number"] < next_min_fixation_number)
	& (~fixin[f"on_sentence_num_{algo_choice}"].isna())
	)
	fixin.loc[fixin[f"on_sentence_num_{algo_choice}"] == j, f"gopast_{algo_choice}"] = fixin.loc[
	mask, "duration"
	].sum(skipna=True)

	mask_j = (
	(fixin["fixation_number"] >= min_fixation_number_j)
	& (fixin["fixation_number"] < next_min_fixation_number)
	& (~fixin[f"on_sentence_num_{algo_choice}"].isna())
	& (fixin[f"on_sentence_num_{algo_choice}"] == j)
	)
	fixin.loc[fixin[f"on_sentence_num_{algo_choice}"] == j, f"selgopast_{algo_choice}"] = fixin.loc[
	mask_j, "duration"
	].sum(skipna=True)

	return fixin


	def aggregate_trials(dffix_combined, wordcomb, all_trials_by_subj, algo_choices):
	tmp = dffix_combined.copy()

	trial = tmp.drop_duplicates(subset="subject_trialID", keep="first")
	names = ["subject_trialID", "subject", "trial_id", "item", "condition"]
	trial = trial[names].copy()

	for index, row in trial.iterrows():
	# Check if subject and trial exist in all_trials_by_subj
	if row["subject"] not in all_trials_by_subj:
	print(f"Warning: Subject '{row['subject']}' not found in all_trials_by_subj. Skipping trial {row['trial_id']}.")
	continue
	if row["trial_id"] not in all_trials_by_subj[row["subject"]]:
	print(f"Warning: Trial '{row['trial_id']}' not found for subject '{row['subject']}'. Skipping.")
	continue

	selected_trial = all_trials_by_subj[row["subject"]][row["trial_id"]]
	info_keys = [
	k for k in selected_trial.keys() if k in ["trial_start_time", "trial_end_time", "question_correct"]
	]

	# Check if Fixation Cleaning Stats exists
	if "Fixation Cleaning Stats" in selected_trial:
	if selected_trial["Fixation Cleaning Stats"].get("Discard fixation before or after blinks", False):
	trial.at[index, "blink"] = selected_trial["Fixation Cleaning Stats"].get(
	"Number of discarded fixations due to blinks", 0
	)
	for key, value in selected_trial.items():
	if key in info_keys:
	trial.at[index, key] = value

	subdf = wordcomb.copy().loc[:, ["subject_trialID"]].drop_duplicates(subset=["subject_trialID"], keep="first")
	trial = pd.merge(trial, subdf, on="subject_trialID", how="left")
	for sub, subdf in wordcomb.groupby("subject"):
	for trialid, trialdf in subdf.groupby("trial_id"):
	trial.loc[((trial["subject"] == sub) & (trial["trial_id"] == trialid)), "number_of_words_in_trial"] = (
	trialdf["word"].count()
	)
	trial.sort_values(by="subject_trialID", inplace=True)

	if "blink" in tmp.columns:
	blink = tmp.groupby("subject_trialID")["blink"].sum() / 2
	blink = blink.round().reset_index()
	trial = pd.merge(trial, blink, on="subject_trialID", how="left")

	trial["nfix"] = tmp.groupby("subject_trialID")["fixation_number"].agg("count").values
	new_col_dfs = []
	new_col_dfs.append(tmp.groupby("subject_trialID")["duration"].agg("mean").reset_index(name="mean_fix_duration"))

	new_col_dfs.append(tmp.groupby("subject_trialID")["duration"].agg("sum").reset_index(name="total_fix_duration"))
	for algo_choice in algo_choices:
	new_col_dfs.append(
	tmp.groupby("subject_trialID")[f"word_runid_{algo_choice}"]
	.agg("max")
	.reset_index(name=f"nrun_{algo_choice}")
	)
	tmp[f"saccade_length_{algo_choice}"] = tmp[f"word_land_{algo_choice}"] + tmp[f"word_launch_{algo_choice}"]
	new_col_dfs.append(
	tmp[(tmp[f"saccade_length_{algo_choice}"] >= 0) & tmp[f"saccade_length_{algo_choice}"].notna()]
	.groupby("subject_trialID")[f"saccade_length_{algo_choice}"]
	.agg("mean")
	.reset_index(name=f"saccade_length_{algo_choice}")
	)

	word = wordcomb.copy()
	if f"firstrun_skip_{algo_choice}" in wordcomb.columns:
	new_col_dfs.append(
	word.groupby("subject_trialID")[f"firstrun_skip_{algo_choice}"]
	.agg("mean")
	.reset_index(name=f"skip_{algo_choice}")
	)
	if f"refix_{algo_choice}" in wordcomb.columns:
	new_col_dfs.append(
	word.groupby("subject_trialID")[f"refix_{algo_choice}"]
	.agg("mean")
	.reset_index(name=f"refix_{algo_choice}")
	)
	if f"reg_in_{algo_choice}" in wordcomb.columns:
	new_col_dfs.append(
	word.groupby("subject_trialID")[f"reg_in_{algo_choice}"]
	.agg("mean")
	.reset_index(name=f"reg_{algo_choice}")
	)

	if f"firstrun_dur_{algo_choice}" in wordcomb.columns:
	new_col_dfs.append(
	word.groupby("subject_trialID")[f"firstrun_dur_{algo_choice}"]
	.agg("sum")
	.reset_index(name=f"firstpass_{algo_choice}")
	)

	if f"total_fixation_duration_{algo_choice}" in wordcomb.columns:
	new_col_dfs.append(
	(word[f"total_fixation_duration_{algo_choice}"] - word[f"firstrun_dur_{algo_choice}"])
	.groupby(word["subject_trialID"])
	.agg("sum")
	.reset_index(name=f"rereading_{algo_choice}")
	)
	trial = pd.concat(
	[trial.set_index("subject_trialID")] + [df.set_index("subject_trialID") for df in new_col_dfs], axis=1
	).reset_index()
	trial[f"reading_rate_{algo_choice}"] = (
	60000 / (trial["total_fix_duration"] / trial["number_of_words_in_trial"])
	).round()

	return trial.copy()


	def aggregate_subjects(trials, algo_choices):
	base_cols = [col for col in ["nfix", "blink"] if col in trials.columns]
	if base_cols:
	trial_aggregates = trials.groupby("subject")[base_cols].mean().round(3).reset_index()
	else:
	trial_aggregates = trials[["subject"]].drop_duplicates().reset_index(drop=True)

	if "question_correct" in trials.columns:
	qc_series = trials["question_correct"].copy()
	if qc_series.dtype == "object":
	qc_series = qc_series.replace(
	{
	"True": True,
	"true": True,
	"FALSE": False,
	"False": False,
	"false": False,
	"TRUE": True,
	"": pd.NA,
	None: pd.NA,
	}
	)
	try:
	qc_boolean = qc_series.astype("boolean")
	except (TypeError, ValueError):
	qc_boolean = qc_series.apply(lambda x: bool(x) if pd.notna(x) else pd.NA).astype("boolean")
	qc_int = qc_boolean.astype("Int64")
	question_counts = (
	qc_int.groupby(trials["subject"])
	.sum(min_count=0)
	.fillna(0)
	.astype("Int64")
	.rename("n_question_correct")
	.reset_index()
	)
	trial_aggregates = trial_aggregates.merge(question_counts, on="subject", how="left")
	trial_aggregates["n_question_correct"] = trial_aggregates["n_question_correct"].astype("Int64")
	else:
	trial_aggregates["n_question_correct"] = pd.Series(0, index=trial_aggregates.index, dtype="Int64")

	trial_aggregates = trial_aggregates.merge(
	trials.groupby("subject")["trial_id"].count().reset_index(name="ntrial"), on="subject"
	)
	for algo_choice in algo_choices:
	cols_to_do = [
	c
	for c in [
	f"saccade_length_{algo_choice}",
	f"reg_{algo_choice}",
	f"mean_fix_duration_{algo_choice}",
	f"total_fix_duration_{algo_choice}",
	f"reading_rate_{algo_choice}",
	f"refix_{algo_choice}",
	f"nrun_{algo_choice}",
	f"skip_{algo_choice}",
	]
	if c in trials.columns
	]
	if cols_to_do:
	trial_aggregates_temp = trials.groupby("subject")[cols_to_do].mean().round(3).reset_index()
	trial_aggregates = pd.merge(trial_aggregates, trial_aggregates_temp, how="left", on="subject")

	return trial_aggregates