| | """ |
| | Mostly adapted from: https://github.com/sascha2schroeder/popEye |
| | """ |
| |
|
| | from copy import deepcopy |
| | import numpy as np |
| | import pandas as pd |
| | from icecream import ic |
| | from scipy import stats |
| | import pathlib as pl |
| |
|
| | RESULTS_FOLDER = pl.Path("results") |
| |
|
| |
|
| | def compute_velocity(xy): |
| | samp = 1000 |
| |
|
| | N = xy.shape[0] |
| | v = pd.DataFrame(data=np.zeros((N, 3)), columns=["time", "vx", "vy"]) |
| | v["time"] = xy["time"] |
| |
|
| | v.iloc[2 : (N - 2), 1:3] = ( |
| | samp |
| | / 6 |
| | * ( |
| | xy.iloc[4:N, 1:3].values |
| | + xy.iloc[3 : (N - 1), 1:3].values |
| | - xy.iloc[1 : (N - 3), 1:3].values |
| | - xy.iloc[0 : (N - 4), 1:3].values |
| | ) |
| | ) |
| | v.iloc[1, 1:3] = samp / 2 * (xy.iloc[2, 1:3].values - xy.iloc[0, 1:3].values) |
| | v.iloc[(N - 2), 1:3] = samp / 2 * (xy.iloc[N - 1, 1:3].values - xy.iloc[N - 4, 1:3].values) |
| |
|
| | xy = pd.concat([xy.set_index("time"), v.set_index("time")], axis=1).reset_index() |
| | return xy |
| |
|
| |
|
| | def event_long(events_df): |
| | events_df["duration"] = events_df["stop"] - events_df["start"] |
| | events_df = events_df[events_df["duration"] > 0] |
| | events_df = events_df.drop(columns=["duration"]) |
| | events_df.reset_index(drop=True, inplace=True) |
| | tmplong_cols = list(events_df.columns) |
| | tmplong_cols.remove("msg") |
| | events_df["del"] = 0 |
| | for i in events_df.index: |
| | if events_df.loc[i, "msg"] == "BLINK": |
| | if i == 0: |
| | continue |
| | for col in tmplong_cols: |
| | events_df.loc[i, col] = events_df.loc[i - 1, col] |
| | events_df.loc[i - 1, "del"] = 1 |
| |
|
| | events_df = events_df[events_df["del"] == 0] |
| | events_df = events_df.drop(columns=["del"]) |
| | events_df.reset_index(drop=True, inplace=True) |
| | events_df["num"] = range(len(events_df)) |
| | |
| | |
| |
|
| | events_df["blink_before"] = 0 |
| | events_df["blink_after"] = 0 |
| |
|
| | for i in events_df.index: |
| | if events_df.loc[i, "msg"] == "BLINK": |
| | events_df.loc[i - 1, "blink_after"] = 1 |
| | if i < len(events_df) - 1: |
| | events_df.loc[i + 1, "blink_before"] = 1 |
| |
|
| | |
| | events_df["blink"] = (events_df["blink_before"] == 1) | (events_df["blink_after"] == 1) |
| | return events_df.copy() |
| |
|
| |
|
| | def compute_non_line_dependent_saccade_measures(saccade_df, trial_dict): |
| |
|
| | saccade_df["trial_id"] = trial_dict["trial_id"] |
| | gaze_df = trial_dict["gaze_df"] |
| | for s in range(len(saccade_df)): |
| | is_directional_deviation = False |
| | a = saccade_df["start_time"][s] |
| | b = saccade_df["end_time"][s] |
| |
|
| | if not gaze_df["x"][[True if (a <= x <= b) else False for x in gaze_df["time"]]].any(): |
| | gaze_df.loc[a:b, "x"] = np.nan |
| |
|
| | bool_vec = (gaze_df["time"] >= a) & (gaze_df["time"] <= b) |
| | if (not gaze_df["x"][bool_vec].isna().any()) and bool_vec.any(): |
| | |
| | minx = min(gaze_df.loc[bool_vec, "x"]) |
| | maxx = max(gaze_df.loc[bool_vec, "x"]) |
| | if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3": |
| | miny = min(gaze_df.loc[bool_vec, "y"]) |
| | maxy = max(gaze_df.loc[bool_vec, "y"]) |
| | ix1 = gaze_df.loc[bool_vec, "x"].index[np.argmin(gaze_df.loc[bool_vec, "x"])] |
| | ix2 = gaze_df.loc[bool_vec, "x"].index[np.argmax(gaze_df.loc[bool_vec, "x"])] |
| | if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3": |
| | iy1 = gaze_df.loc[bool_vec, "y"].index[np.argmin(gaze_df.loc[bool_vec, "y"])] |
| | iy2 = gaze_df.loc[bool_vec, "y"].index[np.argmax(gaze_df.loc[bool_vec, "y"])] |
| | saccade_df.loc[s, "dX"] = round(np.sign(ix2 - ix1) * (maxx - minx)) |
| | if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3": |
| | saccade_df.loc[s, "dY"] = round(np.sign(iy2 - iy1) * (maxy - miny)) |
| |
|
| | |
| | if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3": |
| | saccade_df.loc[s, "amp_px"] = round( |
| | np.sqrt(saccade_df.loc[s, "dX"] ** 2 + saccade_df.loc[s, "dY"] ** 2) |
| | ) |
| | saccade_df.loc[s, "amp_angle"] = round(np.arctan2(saccade_df.loc[s, "dY"], saccade_df.loc[s, "dX"]), 2) |
| | saccade_df.loc[s, "amp_angle_deg"] = round( |
| | np.arctan2(saccade_df.loc[s, "dY"], saccade_df.loc[s, "dX"]) * (180 / np.pi), 2 |
| | ) |
| |
|
| | else: |
| | saccade_df.loc[s, "amp_px"] = np.nan |
| | saccade_df.loc[s, "amp_angle"] = np.nan |
| | saccade_df.loc[s, "amp_angle_deg"] = np.nan |
| |
|
| | if 35 <= abs(saccade_df.loc[s, "angle"]) <= 145: |
| | if saccade_df.loc[s, "xe"] - saccade_df.loc[s, "xs"] > 0 and not ( |
| | "blink_before" in saccade_df.columns |
| | and (saccade_df.loc[s, "blink_before"] or saccade_df.loc[s, "blink_after"]) |
| | ): |
| | is_directional_deviation = True |
| |
|
| | saccade_df.loc[s, "is_directional_deviation"] = is_directional_deviation |
| |
|
| | return saccade_df |
| |
|
| |
|
| | def compute_saccade_measures(saccade_df, trial_dict, algo_choice): |
| |
|
| | if algo_choice is not None: |
| | algo_str = f"_{algo_choice}" |
| | else: |
| | algo_str = "" |
| | gaze_df = trial_dict["gaze_df"] |
| | saccade_df.reset_index(drop=True, inplace=True) |
| | saccade_df.loc[:, f"has_line_change{algo_str}"] = ( |
| | saccade_df.loc[:, f"lines{algo_str}"] != saccade_df.loc[:, f"linee{algo_str}"] |
| | ) |
| | saccade_df.loc[:, f"goes_to_next_line{algo_str}"] = saccade_df.loc[:, f"linee{algo_str}"] == ( |
| | saccade_df.loc[:, f"lines{algo_str}"] + 1 |
| | ) |
| | saccade_df.loc[:, f"is_directional_deviation{algo_str}"] = False |
| | saccade_df.loc[:, f"is_return_sweep{algo_str}"] = False |
| |
|
| | for sidx, subdf in saccade_df.groupby(f"lines{algo_str}"): |
| | if subdf.iloc[-1][f"goes_to_next_line{algo_str}"]: |
| | saccade_df.loc[subdf.index[-1], f"is_return_sweep{algo_str}"] = True |
| |
|
| | for s in range(len(saccade_df)): |
| | is_directional_deviation = False |
| | a = saccade_df["start_time"][s] |
| | b = saccade_df["end_time"][s] |
| |
|
| | if not gaze_df["x"][[True if (a <= x <= b) else False for x in gaze_df["time"]]].any(): |
| | gaze_df.loc[a:b, "x"] = np.nan |
| |
|
| | |
| | if saccade_df.loc[s, f"lete{algo_str}"] is None or saccade_df.loc[s, f"lets{algo_str}"] is None: |
| | ic( |
| | f"None found for compute_saccade_measures at index {s} for subj {trial_dict['subject']} and trial {trial_dict['trial_id']}" |
| | ) |
| | else: |
| | saccade_df.loc[s, f"dist_let{algo_str}"] = ( |
| | saccade_df.loc[s, f"lete{algo_str}"] - saccade_df.loc[s, f"lets{algo_str}"] |
| | ) |
| |
|
| | bool_vec = (gaze_df["time"] >= a) & (gaze_df["time"] <= b) |
| | if (not gaze_df["x"][bool_vec].isna().any()) and bool_vec.any(): |
| | |
| | if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3": |
| | vx = gaze_df.vx[bool_vec] |
| | vy = gaze_df.vy[bool_vec] |
| | if not vx.empty and not vy.empty: |
| | saccade_df.loc[s, f"peak_vel{algo_str}"] = round(np.nanmax(np.sqrt(vx**2 + vy**2))) |
| | else: |
| | saccade_df.loc[s, f"peak_vel{algo_str}"] = round(np.nanmax(np.sqrt(gaze_df.vx[bool_vec] ** 2))) |
| |
|
| | if 35 <= abs(saccade_df.loc[s, f"angle{algo_str}"]) <= 145: |
| | if saccade_df.loc[s, "xe"] - saccade_df.loc[s, "xs"] > 0 and not ( |
| | "blink_before" in saccade_df.columns |
| | and (saccade_df.loc[s, "blink_before"] or saccade_df.loc[s, "blink_after"]) |
| | ): |
| | is_directional_deviation = True |
| |
|
| | saccade_df.loc[s, f"is_directional_deviation{algo_str}"] = is_directional_deviation |
| | return saccade_df.copy() |
| |
|
| |
|
| | def get_angle_and_eucl_dist(saccade_df, algo_choice=None): |
| | if algo_choice is not None: |
| | algo_str = f"_{algo_choice}" |
| | else: |
| | algo_str = "" |
| | saccade_df["xe_minus_xs"] = saccade_df["xe"] - saccade_df["xs"] |
| | saccade_df[f"ye_minus_ys{algo_str}"] = saccade_df[f"ye{algo_str}"] - saccade_df[f"ys{algo_str}"] |
| | saccade_df["eucledian_distance"] = ( |
| | saccade_df["xe_minus_xs"].map(np.square) + saccade_df[f"ye_minus_ys{algo_str}"].map(np.square) |
| | ).map(np.sqrt) |
| | saccade_df[f"angle{algo_str}"] = np.arctan2( |
| | saccade_df.loc[:, f"ye_minus_ys{algo_str}"], saccade_df.loc[:, "xe_minus_xs"] |
| | ) * (180 / np.pi) |
| | return saccade_df |
| |
|
| |
|
| | def compute_saccade_length(dffix, stimulus_df, algo_choice): |
| |
|
| | for j in dffix.index: |
| | if ( |
| | j == 0 |
| | or pd.isna(dffix.at[j, f"line_num_{algo_choice}"]) |
| | or pd.isna(dffix.at[j - 1, f"line_num_{algo_choice}"]) |
| | or dffix.at[j, f"letternum_{algo_choice}"] is None |
| | or dffix.at[j - 1, f"letternum_{algo_choice}"] is None |
| | ): |
| | continue |
| |
|
| | |
| | if dffix.at[j - 1, f"line_num_{algo_choice}"] == dffix.at[j, f"line_num_{algo_choice}"]: |
| | dffix.at[j, f"sac_in_{algo_choice}"] = ( |
| | dffix.at[j, f"letternum_{algo_choice}"] - dffix.at[j - 1, f"letternum_{algo_choice}"] |
| | ) |
| |
|
| | |
| | elif dffix.at[j - 1, f"line_num_{algo_choice}"] < dffix.at[j, f"line_num_{algo_choice}"]: |
| | min_stim_j = np.min( |
| | stimulus_df[stimulus_df["assigned_line"] == dffix.at[j, f"line_num_{algo_choice}"]]["letternum"] |
| | ) |
| | min_stim_j_1 = np.min( |
| | stimulus_df[stimulus_df["assigned_line"] == dffix.at[j - 1, f"line_num_{algo_choice}"]]["letternum"] |
| | ) |
| | dffix.at[j, f"sac_in_{algo_choice}"] = (dffix.at[j, f"letternum_{algo_choice}"] - min_stim_j) - ( |
| | dffix.at[j - 1, f"letternum_{algo_choice}"] - min_stim_j_1 |
| | ) |
| |
|
| | |
| | elif dffix.at[j - 1, f"line_num_{algo_choice}"] > dffix.at[j, f"line_num_{algo_choice}"]: |
| | min_stim_j_1 = np.min( |
| | stimulus_df[stimulus_df["assigned_line"] == dffix.at[j - 1, f"line_num_{algo_choice}"]]["letternum"] |
| | ) |
| | min_stim_j = np.min( |
| | stimulus_df[stimulus_df["assigned_line"] == dffix.at[j, f"line_num_{algo_choice}"]]["letternum"] |
| | ) |
| | dffix.at[j, f"sac_in_{algo_choice}"] = (dffix.at[j - 1, f"letternum_{algo_choice}"] - min_stim_j_1) - ( |
| | dffix.at[j, f"letternum_{algo_choice}"] - min_stim_j |
| | ) |
| |
|
| | for j in range(len(dffix) - 1): |
| | if ( |
| | pd.isna(dffix.at[j, f"line_num_{algo_choice}"]) |
| | or pd.isna(dffix.at[j + 1, f"line_num_{algo_choice}"]) |
| | or dffix.at[j + 1, f"letternum_{algo_choice}"] is None |
| | or dffix.at[j, f"letternum_{algo_choice}"] is None |
| | ): |
| | continue |
| |
|
| | |
| | if dffix.at[j + 1, f"line_num_{algo_choice}"] == dffix.at[j, f"line_num_{algo_choice}"]: |
| | dffix.at[j, f"sac_out_{algo_choice}"] = ( |
| | dffix.at[j + 1, f"letternum_{algo_choice}"] - dffix.at[j, f"letternum_{algo_choice}"] |
| | ) |
| |
|
| | elif dffix.at[j + 1, f"line_num_{algo_choice}"] > dffix.at[j, f"line_num_{algo_choice}"]: |
| | min_stim_j_1 = np.min( |
| | stimulus_df[stimulus_df["assigned_line"] == dffix.at[j + 1, f"line_num_{algo_choice}"]]["letternum"] |
| | ) |
| | min_stim_j = np.min( |
| | stimulus_df[stimulus_df["assigned_line"] == dffix.at[j, f"line_num_{algo_choice}"]]["letternum"] |
| | ) |
| | dffix.at[j, f"sac_out_{algo_choice}"] = (dffix.at[j + 1, f"letternum_{algo_choice}"] - min_stim_j_1) - ( |
| | dffix.at[j, f"letternum_{algo_choice}"] - min_stim_j |
| | ) |
| |
|
| | elif dffix.at[j + 1, f"line_num_{algo_choice}"] < dffix.at[j, f"line_num_{algo_choice}"]: |
| | min_stim_j_1 = np.min( |
| | stimulus_df[stimulus_df["assigned_line"] == dffix.at[j, f"line_num_{algo_choice}"]]["letternum"] |
| | ) |
| | min_stim_j = np.min( |
| | stimulus_df[stimulus_df["assigned_line"] == dffix.at[j + 1, f"line_num_{algo_choice}"]]["letternum"] |
| | ) |
| | dffix.at[j, f"sac_out_{algo_choice}"] = (dffix.at[j, f"letternum_{algo_choice}"] - min_stim_j) - ( |
| | dffix.at[j + 1, f"letternum_{algo_choice}"] - min_stim_j_1 |
| | ) |
| |
|
| | return dffix |
| |
|
| |
|
| | def compute_launch_distance(dffix, algo_choice): |
| |
|
| | for i in range(1, dffix.shape[0]): |
| | if pd.isna(dffix.loc[i, f"sac_in_{algo_choice}"]): |
| | continue |
| |
|
| | if dffix.loc[i, f"sac_in_{algo_choice}"] >= 0: |
| | dffix.loc[i, f"word_launch_{algo_choice}"] = ( |
| | dffix.loc[i, f"sac_in_{algo_choice}"] - dffix.loc[i, f"word_land_{algo_choice}"] |
| | ) |
| |
|
| | else: |
| | dffix.loc[i, f"word_launch_{algo_choice}"] = ( |
| | dffix.loc[i, f"sac_in_{algo_choice}"] + dffix.loc[i - 1, f"word_land_{algo_choice}"] |
| | ) |
| |
|
| | return dffix |
| |
|
| |
|
| | def compute_refixation(dffix, algo_choice): |
| | dffix.loc[:, f"word_refix_{algo_choice}"] = False |
| | dffix.loc[:, f"sentence_refix_{algo_choice}"] = False |
| | for j in dffix.index: |
| | if ( |
| | j == 0 |
| | or pd.isna(dffix.loc[j, f"on_word_number_{algo_choice}"]) |
| | or pd.isna(dffix.loc[j - 1, f"on_word_number_{algo_choice}"]) |
| | ): |
| | continue |
| | dffix.loc[j, f"word_refix_{algo_choice}"] = ( |
| | dffix.loc[j, f"on_word_number_{algo_choice}"] == dffix.loc[j - 1, f"on_word_number_{algo_choice}"] |
| | ) |
| | dffix.loc[j, f"sentence_refix_{algo_choice}"] = ( |
| | dffix.loc[j, f"on_sentence_num_{algo_choice}"] == dffix.loc[j - 1, f"on_sentence_num_{algo_choice}"] |
| | ) |
| | return dffix |
| |
|
| |
|
| | def compute_regression(dffix, algo_choice): |
| | tmp = dffix.copy() |
| | tmp.reset_index(drop=True, inplace=True) |
| | tmp.loc[:, f"word_reg_out_{algo_choice}"] = False |
| | tmp.loc[:, f"word_reg_in_{algo_choice}"] = False |
| | tmp.loc[:, f"word_reg_out_to_{algo_choice}"] = float("nan") |
| | tmp.loc[:, f"word_reg_in_from_{algo_choice}"] = float("nan") |
| | tmp.loc[:, f"sentence_reg_out_{algo_choice}"] = False |
| | tmp.loc[:, f"sentence_reg_in_{algo_choice}"] = False |
| | tmp.loc[:, f"sentence_reg_out_to_{algo_choice}"] = float("nan") |
| | tmp.loc[:, f"sentence_reg_in_from_{algo_choice}"] = float("nan") |
| |
|
| | if len(tmp) > 1: |
| | for j in range(1, len(tmp)): |
| | |
| | if pd.isnull(tmp.iloc[j][f"on_word_number_{algo_choice}"]) or pd.isnull( |
| | tmp.iloc[j - 1][f"on_word_number_{algo_choice}"] |
| | ): |
| | continue |
| |
|
| | |
| | if tmp.iloc[j][f"on_word_number_{algo_choice}"] < tmp.iloc[j - 1][f"on_word_number_{algo_choice}"]: |
| | tmp.loc[j, f"word_reg_in_{algo_choice}"] = True |
| | tmp.loc[j - 1, f"word_reg_out_{algo_choice}"] = True |
| | tmp.loc[j, f"word_reg_in_from_{algo_choice}"] = tmp.iloc[j - 1][f"on_word_number_{algo_choice}"] |
| | tmp.loc[j - 1, f"word_reg_out_to_{algo_choice}"] = tmp.iloc[j][f"on_word_number_{algo_choice}"] |
| |
|
| | |
| | if tmp.iloc[j][f"on_sentence_num_{algo_choice}"] < tmp.iloc[j - 1][f"on_sentence_num_{algo_choice}"]: |
| | tmp.loc[j, f"sentence_reg_in_{algo_choice}"] = True |
| | tmp.loc[j - 1, f"sentence_reg_out_{algo_choice}"] = True |
| | tmp.loc[j, f"sentence_reg_in_from_{algo_choice}"] = tmp.iloc[j - 1][f"on_sentence_num_{algo_choice}"] |
| | tmp.loc[j - 1, f"sentence_reg_out_to_{algo_choice}"] = tmp.iloc[j][f"on_sentence_num_{algo_choice}"] |
| |
|
| | extra_cols = list(set(tmp.columns) - set(dffix.columns)) |
| | |
| | cols_to_add = ["fixation_number"] + extra_cols |
| |
|
| | |
| | dffix = pd.merge(dffix, tmp[cols_to_add], on="fixation_number", how="outer") |
| | return dffix |
| |
|
| |
|
| | def compute_firstskip(dffix, algo_choice): |
| | dffix[f"word_firstskip_{algo_choice}"] = 0 |
| | word_mem = [] |
| |
|
| | dffix[f"sentence_firstskip_{algo_choice}"] = 0 |
| | sentence_mem = [] |
| | dffix.reset_index(inplace=True) |
| | for j in range(dffix.shape[0]): |
| |
|
| | |
| | if ( |
| | dffix.loc[j, f"on_word_number_{algo_choice}"] < np.max(word_mem, initial=0) |
| | and dffix.loc[j, f"on_word_number_{algo_choice}"] not in word_mem |
| | ): |
| | dffix.loc[j, f"word_firstskip_{algo_choice}"] = 1 |
| |
|
| | |
| | if ( |
| | dffix.loc[j, f"on_sentence_num_{algo_choice}"] < np.max(sentence_mem, initial=0) |
| | and dffix.loc[j, f"on_sentence_num_{algo_choice}"] not in sentence_mem |
| | ): |
| | dffix.loc[j, f"sentence_firstskip_{algo_choice}"] = 1 |
| |
|
| | word_mem.append(dffix.loc[j, f"on_word_number_{algo_choice}"]) |
| | sentence_mem.append(dffix.loc[j, f"on_sentence_num_{algo_choice}"]) |
| |
|
| | |
| | dffix.loc[dffix[f"line_num_{algo_choice}"].isna(), f"word_firstskip_{algo_choice}"] = np.nan |
| | dffix.loc[dffix[f"line_num_{algo_choice}"].isna(), f"sentence_firstskip_{algo_choice}"] = np.nan |
| | dffix.set_index("index", inplace=True) |
| | return dffix |
| |
|
| |
|
| | def compute_run(dffix, algo_choice): |
| | if "fixation_number" not in dffix.columns and "num" in dffix.columns: |
| | dffix["fixation_number"] = dffix["num"] |
| | tmp = dffix.copy() |
| | tmp.reset_index(inplace=True, drop=True) |
| | |
| | tmp.loc[~tmp[f"on_word_{algo_choice}"].isna(), f"word_runid_{algo_choice}"] = 0 |
| | tmp[f"sentence_runid_{algo_choice}"] = 0 |
| |
|
| | |
| | if len(tmp) > 1: |
| | for j in range(1, len(tmp)): |
| |
|
| | |
| | if tmp[f"word_reg_in_{algo_choice}"][j] == 1 and tmp[f"word_reg_in_{algo_choice}"][j - 1] != 1: |
| | tmp.loc[j, f"word_runid_{algo_choice}"] = tmp[f"word_runid_{algo_choice}"][j - 1] + 1 |
| | else: |
| | tmp.loc[j, f"word_runid_{algo_choice}"] = tmp.loc[j - 1, f"word_runid_{algo_choice}"] |
| |
|
| | |
| | if tmp[f"sentence_reg_in_{algo_choice}"][j] == 1 and tmp[f"sentence_reg_in_{algo_choice}"][j - 1] != 1: |
| | tmp.loc[j, f"sentence_runid_{algo_choice}"] = tmp[f"sentence_runid_{algo_choice}"][j - 1] + 1 |
| | else: |
| | tmp.loc[j, f"sentence_runid_{algo_choice}"] = tmp[f"sentence_runid_{algo_choice}"][j - 1] |
| | tmp[f"word_runid_{algo_choice}"] = tmp[f"word_runid_{algo_choice}"] - 1 |
| | tmp[f"sentence_runid_{algo_choice}"] = tmp[f"sentence_runid_{algo_choice}"] - 1 |
| | |
| | tmp[f"word_fix_{algo_choice}"] = tmp.groupby(f"on_word_number_{algo_choice}")["fixation_number"].transform( |
| | lambda x: stats.rankdata(x, method="min") |
| | ) |
| | |
| | tmp[f"sentence_fix_{algo_choice}"] = tmp.groupby(f"on_sentence_num_{algo_choice}")["fixation_number"].transform( |
| | lambda x: stats.rankdata(x, method="min") |
| | ) |
| |
|
| | |
| | tmp["id"] = tmp[f"on_word_number_{algo_choice}"].astype(str) + ":" + tmp[f"word_runid_{algo_choice}"].astype(str) |
| | fix_tmp = tmp.copy().drop_duplicates(subset="id") |
| | fix_tmp[f"word_run_{algo_choice}"] = fix_tmp.groupby(f"on_word_number_{algo_choice}")[ |
| | f"word_runid_{algo_choice}" |
| | ].transform(lambda x: stats.rankdata(x, method="min")) |
| |
|
| | if f"word_run_{algo_choice}" in tmp.columns: |
| | tmp = tmp.drop(columns=[f"word_run_{algo_choice}"]) |
| | tmp = pd.merge(tmp, fix_tmp[["id", f"word_run_{algo_choice}"]], on="id") |
| | del tmp["id"] |
| | tmp = tmp.sort_values("fixation_number") |
| |
|
| | |
| | tmp["id"] = ( |
| | tmp[f"on_sentence_num_{algo_choice}"].astype(str) + ":" + tmp[f"sentence_runid_{algo_choice}"].astype(str) |
| | ) |
| | fix_tmp = tmp.copy().drop_duplicates(subset="id") |
| | fix_tmp[f"sentence_run_{algo_choice}"] = fix_tmp.groupby(f"on_sentence_num_{algo_choice}")["id"].transform( |
| | lambda x: stats.rankdata(x, method="min") |
| | ) |
| | if f"sentence_run_{algo_choice}" in tmp.columns: |
| | tmp = tmp.drop(columns=[f"sentence_run_{algo_choice}"]) |
| | tmp = pd.merge(tmp, fix_tmp[["id", f"sentence_run_{algo_choice}"]], on="id") |
| | del tmp["id"] |
| | tmp = tmp.sort_values("fixation_number") |
| |
|
| | |
| | tmp["id"] = tmp[f"on_word_number_{algo_choice}"].astype(str) + ":" + tmp[f"word_run_{algo_choice}"].astype(str) |
| | tmp[f"word_run_fix_{algo_choice}"] = tmp.groupby(["id"])["fixation_number"].rank("first").values |
| | del tmp["id"] |
| | tmp = tmp.sort_values("fixation_number") |
| |
|
| | |
| | tmp["id"] = tmp[f"on_sentence_num_{algo_choice}"].astype(str) + ":" + tmp[f"sentence_run_{algo_choice}"].astype(str) |
| | tmp[f"sentence_run_fix_{algo_choice}"] = tmp.groupby(["id"])["fixation_number"].rank("first").values |
| | del tmp["id"] |
| | tmp = tmp.sort_values("fixation_number") |
| | names = [ |
| | "fixation_number", |
| | f"word_runid_{algo_choice}", |
| | f"sentence_runid_{algo_choice}", |
| | f"word_fix_{algo_choice}", |
| | f"sentence_fix_{algo_choice}", |
| | f"word_run_{algo_choice}", |
| | f"sentence_run_{algo_choice}", |
| | f"word_run_fix_{algo_choice}", |
| | f"sentence_run_fix_{algo_choice}", |
| | ] |
| | dffix = pd.merge(dffix, tmp[names], on="fixation_number", how="left") |
| | return dffix.copy() |
| |
|
| |
|
| | def compute_landing_position(dffix, algo_choice): |
| | dffix[f"word_cland_{algo_choice}"] = ( |
| | dffix[f"word_land_{algo_choice}"] - (dffix[f"on_word_{algo_choice}"].str.len() + 1) / 2 |
| | ) |
| | return dffix |
| |
|
| |
|
| | def aggregate_words_firstrun( |
| | fix, |
| | algo_choice, |
| | measures_to_calculate=[ |
| | "firstrun_blink", |
| | "firstrun_skip", |
| | "firstrun_refix", |
| | "firstrun_reg_in", |
| | "firstrun_reg_out", |
| | "firstrun_dur", |
| | "firstrun_gopast", |
| | "firstrun_gopast_sel", |
| | ], |
| | ): |
| | firstruntmp = fix.loc[fix[f"word_run_{algo_choice}"] == 1].copy() |
| |
|
| | firstrun = firstruntmp.drop_duplicates(subset=f"on_word_number_{algo_choice}", keep="first").copy() |
| |
|
| | names = [ |
| | "subject", |
| | "trial_id", |
| | "item", |
| | "condition", |
| | f"on_word_number_{algo_choice}", |
| | f"on_word_{algo_choice}", |
| | "fixation_number", |
| | ] |
| | firstrun = firstrun[names].sort_values(f"on_word_number_{algo_choice}") |
| |
|
| | |
| | firstrun[f"firstrun_nfix_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[ |
| | "fixation_number" |
| | ].transform( |
| | "count" |
| | ) |
| | firstrun[f"firstrun_nfix_{algo_choice}"] = firstrun[f"firstrun_nfix_{algo_choice}"].fillna(0) |
| | if "firstrun_blink" in measures_to_calculate: |
| | if "blink" in firstruntmp: |
| | firstrun[f"firstrun_blink_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[ |
| | "blink" |
| | ].transform("max") |
| | else: |
| | firstrun[f"firstrun_blink_{algo_choice}"] = 0 |
| |
|
| | if "firstrun_skip" in measures_to_calculate: |
| | firstrun[f"firstrun_skip_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[ |
| | f"word_firstskip_{algo_choice}" |
| | ].transform("max") |
| | if "firstrun_refix" in measures_to_calculate: |
| | firstrun[f"firstrun_refix_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[ |
| | f"word_refix_{algo_choice}" |
| | ].transform("max") |
| | if "firstrun_reg_in" in measures_to_calculate: |
| | firstrun[f"firstrun_reg_in_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[ |
| | f"word_reg_out_{algo_choice}" |
| | ].transform("max") |
| | if "firstrun_reg_out" in measures_to_calculate: |
| | firstrun[f"firstrun_reg_out_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[ |
| | f"word_reg_in_{algo_choice}" |
| | ].transform("max") |
| | if "firstrun_dur" in measures_to_calculate: |
| | firstrun[f"firstrun_dur_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[ |
| | "duration" |
| | ].transform("sum") |
| | firstrun = firstrun.sort_values(["trial_id", f"on_word_number_{algo_choice}"]).copy() |
| |
|
| | return firstrun |
| |
|
| |
|
| | def compute_gopast_word(fixations_dataframe, algo_choice): |
| |
|
| | ias = np.unique(fixations_dataframe.loc[:, f"on_word_number_{algo_choice}"]) |
| |
|
| | for j in range(len(ias) - 1): |
| | fixations_dataframe.loc[ |
| | (fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]), f"gopast_{algo_choice}" |
| | ] = np.nansum( |
| | fixations_dataframe.loc[ |
| | ( |
| | fixations_dataframe["fixation_number"] |
| | >= np.min( |
| | fixations_dataframe.loc[ |
| | (fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]), "fixation_number" |
| | ] |
| | ) |
| | ) |
| | & ( |
| | fixations_dataframe["fixation_number"] |
| | < np.min( |
| | fixations_dataframe.loc[ |
| | (fixations_dataframe[f"on_word_number_{algo_choice}"] > ias[j]), "fixation_number" |
| | ] |
| | ) |
| | ) |
| | & (~fixations_dataframe[f"on_word_number_{algo_choice}"].isna()) |
| | ]["duration"] |
| | ) |
| |
|
| | fixations_dataframe.loc[ |
| | (fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]), f"selgopast_{algo_choice}" |
| | ] = np.nansum( |
| | fixations_dataframe.loc[ |
| | ( |
| | fixations_dataframe["fixation_number"] |
| | >= np.min( |
| | fixations_dataframe.loc[ |
| | (fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]), "fixation_number" |
| | ] |
| | ) |
| | ) |
| | & ( |
| | fixations_dataframe["fixation_number"] |
| | < np.min( |
| | fixations_dataframe.loc[ |
| | (fixations_dataframe[f"on_word_number_{algo_choice}"] > ias[j]), "fixation_number" |
| | ] |
| | ) |
| | ) |
| | & (fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]) |
| | & (~fixations_dataframe[f"on_word_number_{algo_choice}"].isna()) |
| | ]["duration"] |
| | ) |
| | return fixations_dataframe |
| |
|
| |
|
| | def aggregate_words( |
| | fix, |
| | word_item, |
| | algo_choice, |
| | measures_to_calculate=[ |
| | "blink", |
| | ], |
| | ): |
| | wordtmp = fix.copy() |
| |
|
| | word = wordtmp.drop_duplicates(subset=f"on_word_number_{algo_choice}", keep="first").copy() |
| | names = [ |
| | f"on_sentence_num_{algo_choice}", |
| | f"on_word_number_{algo_choice}", |
| | f"on_word_{algo_choice}", |
| | ] |
| | word = word.loc[:, names].sort_values(by=f"on_word_number_{algo_choice}") |
| |
|
| | wordtmp = compute_gopast_word(wordtmp, algo_choice) |
| |
|
| | if "blink" in measures_to_calculate: |
| | if "blink" in wordtmp: |
| | word[f"blink_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")["blink"].transform("max") |
| | else: |
| | word[f"blink_{algo_choice}"] = 0 |
| | if "nrun" in measures_to_calculate or "reread" in measures_to_calculate: |
| | word[f"nrun_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[ |
| | f"word_run_{algo_choice}" |
| | ].transform("max") |
| | if "reread" in measures_to_calculate: |
| | word[f"reread_{algo_choice}"] = word[f"nrun_{algo_choice}"] > 1 |
| | word[f"number_of_fixations_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[ |
| | "fixation_number" |
| | ].transform("count") |
| | if "refix" in measures_to_calculate: |
| | word[f"refix_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[ |
| | f"word_refix_{algo_choice}" |
| | ].transform("max") |
| | if "reg_in" in measures_to_calculate: |
| | word[f"reg_in_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[ |
| | f"word_reg_in_{algo_choice}" |
| | ].transform("max") |
| | if "reg_out" in measures_to_calculate: |
| | word[f"reg_out_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[ |
| | f"word_reg_out_{algo_choice}" |
| | ].transform("max") |
| | if "total_fixation_duration" in measures_to_calculate: |
| | word[f"total_fixation_duration_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[ |
| | "duration" |
| | ].transform("sum") |
| | if "gopast" in measures_to_calculate and f"gopast_{algo_choice}" in wordtmp.columns: |
| | word[f"gopast_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[ |
| | f"gopast_{algo_choice}" |
| | ].transform("max") |
| | word[f"gopast_{algo_choice}"] = word[f"gopast_{algo_choice}"].fillna(0) |
| |
|
| | if "gopast_sel" in measures_to_calculate and f"selgopast_{algo_choice}" in wordtmp.columns: |
| | word[f"gopast_sel_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[ |
| | f"selgopast_{algo_choice}" |
| | ].transform("max") |
| | word[f"gopast_sel_{algo_choice}"] = word[f"gopast_sel_{algo_choice}"].fillna(0) |
| |
|
| | word.rename({f"on_word_number_{algo_choice}": "word_number"}, axis=1, inplace=True) |
| | word = pd.merge( |
| | word.reset_index(drop=True), word_item.reset_index(drop=True), on="word_number", how="right", validate="1:1" |
| | ) |
| | word[f"number_of_fixations_{algo_choice}"] = word[f"number_of_fixations_{algo_choice}"].fillna(0) |
| | if "total_fixation_duration" in measures_to_calculate: |
| | word[f"total_fixation_duration_{algo_choice}"] = word[f"total_fixation_duration_{algo_choice}"].fillna(0) |
| |
|
| | word[f"skip_{algo_choice}"] = 0 |
| | if "blink" in measures_to_calculate: |
| | word.loc[word[f"blink_{algo_choice}"].isna(), f"skip_{algo_choice}"] = 1 |
| | word.loc[word[f"number_of_fixations_{algo_choice}"] == 0, f"skip_{algo_choice}"] = 1 |
| | word[f"skip_{algo_choice}"] = word[f"skip_{algo_choice}"].astype("boolean") |
| |
|
| | if "number_of_fixations" not in measures_to_calculate: |
| | word = word.drop(columns=f"number_of_fixations_{algo_choice}") |
| | if "blink" in measures_to_calculate: |
| | word[f"blink_{algo_choice}"] = word[f"blink_{algo_choice}"].astype("boolean") |
| |
|
| | word = word.sort_values(by=["word_number"]) |
| |
|
| | if "condition" in wordtmp.columns and "condition" not in word.columns: |
| | word.insert(loc=0, column="condition", value=wordtmp["condition"].iloc[0]) |
| | if "item" in wordtmp.columns and "item" not in word.columns: |
| | word.insert(loc=0, column="item", value=wordtmp["item"].iloc[0]) |
| | if "trial_id" in wordtmp.columns and "trial_id" not in word.columns: |
| | word.insert(loc=0, column="trial_id", value=wordtmp["trial_id"].iloc[0]) |
| | if "subject" in wordtmp.columns and "subject" not in word.columns: |
| | word.insert(loc=0, column="subject", value=wordtmp["subject"].iloc[0]) |
| |
|
| | return word |
| |
|
| |
|
| | def combine_words(fix, wordfirst, wordtmp, algo_choice, measures_to_calculate): |
| |
|
| | subject = wordtmp["subject"].values[0] |
| | trial_id = wordtmp["trial_id"].values[0] |
| | item = wordtmp["item"].values[0] |
| | condition = wordtmp["condition"].values[0] |
| | wordtmp = wordtmp.loc[ |
| | :, |
| | [ |
| | c |
| | for c in [ |
| | "word_number", |
| | "word", |
| | f"blink_{algo_choice}", |
| | f"skip_{algo_choice}", |
| | f"nrun_{algo_choice}", |
| | f"reread_{algo_choice}", |
| | f"number_of_fixations_{algo_choice}", |
| | f"refix_{algo_choice}", |
| | f"reg_in_{algo_choice}", |
| | f"reg_out_{algo_choice}", |
| | f"total_fixation_duration_{algo_choice}", |
| | f"gopast_{algo_choice}", |
| | f"gopast_sel_{algo_choice}", |
| | ] |
| | if c in wordtmp.columns |
| | ], |
| | ] |
| |
|
| | wordfirsttmp = wordfirst.loc[ |
| | :, |
| | [ |
| | c |
| | for c in [ |
| | f"on_word_number_{algo_choice}", |
| | f"firstrun_skip_{algo_choice}", |
| | f"firstrun_nfix_{algo_choice}", |
| | f"firstrun_refix_{algo_choice}", |
| | f"firstrun_reg_in_{algo_choice}", |
| | f"firstrun_reg_out_{algo_choice}", |
| | f"firstrun_dur_{algo_choice}", |
| | f"firstrun_gopast_{algo_choice}", |
| | f"firstrun_gopast_sel_{algo_choice}", |
| | ] |
| | if c in wordfirst.columns |
| | ], |
| | ] |
| |
|
| | fixtmp = fix[(fix[f"word_run_{algo_choice}"] == 1) & (fix[f"word_run_fix_{algo_choice}"] == 1)].copy() |
| | names = [ |
| | c |
| | for c in [ |
| | f"on_word_number_{algo_choice}", |
| | f"sac_in_{algo_choice}", |
| | f"sac_out_{algo_choice}", |
| | f"word_launch_{algo_choice}", |
| | f"word_land_{algo_choice}", |
| | f"word_cland_{algo_choice}", |
| | f"duration", |
| | ] |
| | if c in fixtmp.columns |
| | ] |
| | fixtmp = fixtmp[names].copy() |
| | fixtmp.rename( |
| | { |
| | f"sac_in_{algo_choice}": f"firstfix_sac_in_{algo_choice}", |
| | f"sac_out_{algo_choice}": f"firstfix_sac_out_{algo_choice}", |
| | f"word_launch_{algo_choice}": f"firstfix_launch_{algo_choice}", |
| | f"word_land_{algo_choice}": f"firstfix_land_{algo_choice}", |
| | f"word_cland_{algo_choice}": f"firstfix_cland_{algo_choice}", |
| | f"duration": f"firstfix_dur_{algo_choice}", |
| | }, |
| | axis=1, |
| | inplace=True, |
| | ) |
| | comb = pd.merge( |
| | pd.merge( |
| | wordtmp, |
| | wordfirsttmp.rename({f"on_word_number_{algo_choice}": "word_number"}, axis=1), |
| | on="word_number", |
| | how="left", |
| | ), |
| | fixtmp.rename({f"on_word_number_{algo_choice}": "word_number"}, axis=1), |
| | on="word_number", |
| | how="left", |
| | ) |
| |
|
| | dropcols = [ |
| | c |
| | for c in [ |
| | f"firstrun_skip_{algo_choice}", |
| | f"firstrun_refix_{algo_choice}", |
| | f"firstrun_reg_in_{algo_choice}", |
| | f"firstrun_reg_out_{algo_choice}", |
| | f"firstrun_dur_{algo_choice}", |
| | f"firstrun_gopast_{algo_choice}", |
| | f"firstrun_gopast_sel_{algo_choice}", |
| | f"firstfix_sac_in_{algo_choice}", |
| | f"firstfix_sac_out_{algo_choice}", |
| | f"firstfix_launch_{algo_choice}", |
| | f"firstfix_land_{algo_choice}", |
| | f"firstfix_cland_{algo_choice}", |
| | f"firstfix_dur_{algo_choice}", |
| | ] |
| | if ((c.replace(f"_{algo_choice}", "") not in measures_to_calculate) & (c in comb.columns)) |
| | ] |
| | comb = comb.drop(columns=dropcols).copy() |
| | comb.sort_values(by="word_number", inplace=True) |
| |
|
| | |
| | if f"skip_{algo_choice}" in comb.columns and f"firstrun_skip_{algo_choice}" in comb.columns: |
| | comb.loc[comb[f"skip_{algo_choice}"] == 1, f"firstrun_skip_{algo_choice}"] = 1 |
| |
|
| | |
| | if f"gopast_{algo_choice}" in comb.columns and "firstrun_gopast" in measures_to_calculate: |
| | comb[f"firstrun_gopast_{algo_choice}"] = comb[f"gopast_{algo_choice}"] |
| | if f"gopast_sel_{algo_choice}" in comb.columns and "firstrun_gopast_sel" in measures_to_calculate: |
| | comb[f"firstrun_gopast_sel_{algo_choice}"] = comb[f"gopast_sel_{algo_choice}"] |
| | if f"gopast_{algo_choice}" in comb.columns: |
| | comb.drop(columns=[f"gopast_{algo_choice}"], inplace=True) |
| |
|
| | if f"gopast_sel_{algo_choice}" in comb.columns: |
| | comb.drop(columns=[f"gopast_sel_{algo_choice}"], inplace=True) |
| |
|
| | if f"firstrun_nfix_{algo_choice}" in comb.columns and "singlefix" in measures_to_calculate: |
| | comb[f"singlefix_{algo_choice}"] = 0 |
| | comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_{algo_choice}"] = 1 |
| |
|
| | if f"firstfix_sac_in_{algo_choice}" in comb.columns and "singlefix_sac_in" in measures_to_calculate: |
| | comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_sac_in_{algo_choice}"] = comb[ |
| | f"firstfix_sac_in_{algo_choice}" |
| | ][(comb[f"firstrun_nfix_{algo_choice}"] == 1)] |
| |
|
| | if f"firstfix_sac_out_{algo_choice}" in comb.columns and "singlefix_sac_out" in measures_to_calculate: |
| | comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_sac_out_{algo_choice}"] = comb[ |
| | f"firstfix_sac_out_{algo_choice}" |
| | ][(comb[f"firstrun_nfix_{algo_choice}"] == 1)] |
| |
|
| | if f"firstfix_launch_{algo_choice}" in comb.columns and "singlefix_launch" in measures_to_calculate: |
| | comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_launch_{algo_choice}"] = comb[ |
| | f"firstfix_launch_{algo_choice}" |
| | ][(comb[f"firstrun_nfix_{algo_choice}"] == 1)] |
| |
|
| | if f"firstfix_land_{algo_choice}" in comb.columns and "singlefix_land" in measures_to_calculate: |
| | comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_land_{algo_choice}"] = comb[ |
| | f"firstfix_land_{algo_choice}" |
| | ][(comb[f"firstrun_nfix_{algo_choice}"] == 1)] |
| |
|
| | if f"firstfix_cland_{algo_choice}" in comb.columns and "singlefix_cland" in measures_to_calculate: |
| | comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_cland_{algo_choice}"] = comb[ |
| | f"firstfix_cland_{algo_choice}" |
| | ][(comb[f"firstrun_nfix_{algo_choice}"] == 1)] |
| |
|
| | if f"firstfix_dur_{algo_choice}" in comb.columns and "singlefix_dur" in measures_to_calculate: |
| | comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_dur_{algo_choice}"] = comb[ |
| | f"firstfix_dur_{algo_choice}" |
| | ][(comb[f"firstrun_nfix_{algo_choice}"] == 1)] |
| |
|
| | if "condition" not in comb.columns: |
| | comb.insert(loc=0, column="condition", value=condition) |
| | if "item" not in comb.columns: |
| | comb.insert(loc=0, column="item", value=item) |
| | if "trial_id" not in comb.columns: |
| | comb.insert(loc=0, column="trial_id", value=trial_id) |
| | if "subject" not in comb.columns: |
| | comb.insert(loc=0, column="subject", value=subject) |
| | return comb.copy() |
| |
|
| |
|
| | def compute_sentence_measures(fix, stimmat, algo_choice, measures_to_calc, save_to_csv=False): |
| | sentitem = deepcopy(stimmat).drop_duplicates( |
| | subset="in_sentence_number", keep="first" |
| | ) |
| | fixin = fix.copy().reset_index(drop=True) |
| | unique_trial_ids = fixin['trial_id'].unique() |
| | if len(unique_trial_ids) == 1: |
| | sentitem.loc[:,'trial_id'] = unique_trial_ids[0] |
| | fixin["on_sentence_num2"] = fixin[f"on_sentence_num_{algo_choice}"].copy() |
| |
|
| | |
| | for j in range(1, len(fixin) - 1): |
| | if fixin.loc[j, "on_sentence_num2"] != fixin.loc[j - 1, "on_sentence_num2"]: |
| | if j + 1 in fixin.index and fixin.loc[j + 1, "on_sentence_num2"] == fixin.loc[j - 1, "on_sentence_num2"]: |
| | fixin.loc[j, "on_sentence_num2"] = fixin.loc[j - 1, "on_sentence_num2"] |
| | elif j + 2 in fixin.index and fixin.loc[j + 2, "on_sentence_num2"] == fixin.loc[j - 1, "on_sentence_num2"]: |
| | fixin.loc[j, "on_sentence_num2"] = fixin.loc[j - 1, "on_sentence_num2"] |
| |
|
| | fixin["id"] = fixin.apply(lambda row: f"{row['on_sentence_num2']}", axis=1) |
| |
|
| | fixin[f"sent_reg_in2_{algo_choice}"] = 0 |
| | fixin[f"sent_reg_out2_{algo_choice}"] = 0 |
| |
|
| | fixin[f"sent_runid2_{algo_choice}"] = 1 |
| |
|
| | fixin.loc[0, "last"] = fixin.loc[0, "id"] |
| | fixin.loc[0, f"firstpass_{algo_choice}"] = 1 |
| | mem = [fixin.loc[0, "on_sentence_num2"]] |
| | wordmem = [fixin.loc[0, f"on_word_number_{algo_choice}"]] |
| | fixin.loc[0, f"forward_{algo_choice}"] = 1 |
| |
|
| | for j in range(1, len(fixin)): |
| | fixin.loc[j, "last"] = fixin.loc[j - 1, "id"] |
| |
|
| | if fixin.loc[j, "on_sentence_num2"] != fixin.loc[j - 1, "on_sentence_num2"]: |
| | fixin.loc[j, f"sent_reg_in2_{algo_choice}"] = 1 |
| | fixin.loc[j - 1, f"sent_reg_out2_{algo_choice}"] = 1 |
| | fixin.loc[j, f"sent_reg_in_from2_{algo_choice}"] = fixin.loc[j - 1, "on_sentence_num2"] |
| | fixin.loc[j - 1, f"sent_reg_out_to2_{algo_choice}"] = fixin.loc[j, "on_sentence_num2"] |
| |
|
| | if fixin.loc[j, f"sent_reg_in2_{algo_choice}"] == 1 and fixin.loc[j - 1, f"sent_reg_in2_{algo_choice}"] != 1: |
| | fixin.loc[j, f"sent_runid2_{algo_choice}"] = fixin.loc[j - 1, f"sent_runid2_{algo_choice}"] + 1 |
| | else: |
| | fixin.loc[j, f"sent_runid2_{algo_choice}"] = fixin.loc[j - 1, f"sent_runid2_{algo_choice}"] |
| |
|
| | if fixin.loc[j, "on_sentence_num2"] >= fixin.loc[j - 1, "on_sentence_num2"]: |
| | if fixin.loc[j, "on_sentence_num2"] in mem: |
| | if fixin.loc[j, "on_sentence_num2"] == max(mem): |
| | fixin.loc[j, f"firstpass_{algo_choice}"] = 1 |
| | else: |
| | fixin.loc[j, f"firstpass_{algo_choice}"] = 0 |
| | else: |
| | mem.append(fixin.loc[j, "on_sentence_num2"]) |
| | fixin.loc[j, f"firstpass_{algo_choice}"] = 1 |
| | else: |
| | fixin.loc[j, f"firstpass_{algo_choice}"] = 0 |
| |
|
| | if fixin.loc[j, f"on_word_number_{algo_choice}"] > max(wordmem): |
| | wordmem.append(fixin.loc[j, f"on_word_number_{algo_choice}"]) |
| | fixin.loc[j, f"forward_{algo_choice}"] = 1 |
| | elif fixin.loc[j, f"on_word_number_{algo_choice}"] < max(wordmem): |
| | fixin.loc[j, f"forward_{algo_choice}"] = 0 |
| |
|
| | for i in range(len(fixin) - 3): |
| | if fixin.loc[i, f"line_change_{algo_choice}"] > 0: |
| | fixin.loc[i, "on_word_number"] = 0 |
| | fixin.loc[i + 1, f"forward_{algo_choice}"] = 1 |
| | fixin.loc[i + 2, f"forward_{algo_choice}"] = 1 |
| | fixin.loc[i + 3, f"forward_{algo_choice}"] = 1 |
| |
|
| | for i in range(1, len(fixin) - 3): |
| | if fixin.loc[i, "on_sentence_num2"] > fixin.loc[i - 1, "on_sentence_num2"]: |
| | fixin.loc[i + 1, f"forward_{algo_choice}"] = 1 |
| | fixin.loc[i + 2, f"forward_{algo_choice}"] = 1 |
| |
|
| | fixin["id2"] = fixin["id"] + ":" + fixin[f"sent_runid2_{algo_choice}"].astype(str) |
| |
|
| | fixin = fixin.sort_values(["trial_id", "fixation_number"]) |
| |
|
| | sent = fixin.copy().drop_duplicates(subset="id", keep="first") |
| | names = [ |
| | "id", |
| | "subject", |
| | "trial_id", |
| | "item", |
| | "condition", |
| | "on_sentence_num2", |
| | f"on_sentence_num_{algo_choice}", |
| | f"on_sentence_{algo_choice}", |
| | "num_words_in_sentence", |
| | ] |
| | sent = sent[names].reset_index(drop=True) |
| |
|
| | sent[f"firstrun_skip_{algo_choice}"] = 0 |
| |
|
| | mem = [] |
| | for j in range(len(sent)): |
| | if not pd.isna(sent.loc[j, f"on_sentence_num_{algo_choice}"]): |
| | if len(mem) > 0 and sent.loc[j, f"on_sentence_num_{algo_choice}"] < max(mem) and not pd.isna(max(mem)): |
| | sent.loc[j, f"firstrun_skip_{algo_choice}"] = 1 |
| | if ( |
| | not pd.isna(sent.loc[j, f"on_sentence_num_{algo_choice}"]) |
| | and sent.loc[j, f"on_sentence_num_{algo_choice}"] not in mem |
| | ): |
| | mem.append(sent.loc[j, f"on_sentence_num_{algo_choice}"]) |
| |
|
| | if "total_n_fixations" in measures_to_calc: |
| | tmp = fixin.groupby("id")["duration"].count().reset_index() |
| | tmp.columns = ["id", f"total_n_fixations_{algo_choice}"] |
| | sent = pd.merge(sent, tmp, on="id", how="left") |
| | sent.fillna({f"total_n_fixations_{algo_choice}": 0}, inplace=True) |
| |
|
| | tmp = fixin.groupby("id")["duration"].sum().reset_index() |
| | tmp.columns = ["id", f"total_dur_{algo_choice}"] |
| | sent = pd.merge(sent, tmp, on="id", how="left") |
| | sent.fillna({f"total_dur_{algo_choice}": 0}, inplace=True) |
| |
|
| | if "firstpass_n_fixations" in measures_to_calc: |
| | tmp = fixin[fixin[f"firstpass_{algo_choice}"] == 1].groupby("id")["duration"].count().reset_index() |
| | tmp.columns = ["id", f"firstpass_n_fixations_{algo_choice}"] |
| | sent = pd.merge(sent, tmp, on="id", how="left") |
| | sent.fillna({f"firstpass_n_fixations_{algo_choice}": 0}, inplace=True) |
| |
|
| | if "firstpass_dur" in measures_to_calc: |
| | tmp = fixin[fixin[f"firstpass_{algo_choice}"] == 1].groupby("id")["duration"].sum().reset_index() |
| | tmp.columns = ["id", f"firstpass_dur_{algo_choice}"] |
| | sent = pd.merge(sent, tmp, on="id", how="left") |
| | sent.fillna({f"firstpass_dur_{algo_choice}": 0}, inplace=True) |
| |
|
| | if "firstpass_forward_n_fixations" in measures_to_calc: |
| | tmp = ( |
| | fixin[(fixin[f"firstpass_{algo_choice}"] == 1) & (fixin[f"forward_{algo_choice}"] == 1)] |
| | .groupby("id")["duration"] |
| | .count() |
| | .reset_index() |
| | ) |
| | tmp.columns = ["id", f"firstpass_forward_n_fixations_{algo_choice}"] |
| | sent = pd.merge(sent, tmp, on="id", how="left") |
| | sent.fillna({f"firstpass_forward_n_fixations_{algo_choice}": 0}, inplace=True) |
| |
|
| | if "firstpass_forward_dur" in measures_to_calc: |
| | tmp = ( |
| | fixin[(fixin[f"firstpass_{algo_choice}"] == 1) & (fixin[f"forward_{algo_choice}"] == 1)] |
| | .groupby("id")["duration"] |
| | .sum() |
| | .reset_index() |
| | ) |
| | tmp.columns = ["id", f"firstpass_forward_dur_{algo_choice}"] |
| | sent = pd.merge(sent, tmp, on="id", how="left") |
| | sent.fillna({f"firstpass_forward_dur_{algo_choice}": 0}, inplace=True) |
| |
|
| | if "firstpass_reread_n_fixations" in measures_to_calc: |
| | tmp = ( |
| | fixin[(fixin[f"firstpass_{algo_choice}"] == 1) & (fixin[f"forward_{algo_choice}"] == 0)] |
| | .groupby("id")["duration"] |
| | .count() |
| | .reset_index() |
| | ) |
| | tmp.columns = ["id", f"firstpass_reread_n_fixations_{algo_choice}"] |
| | sent = pd.merge(sent, tmp, on="id", how="left") |
| | sent.fillna({f"firstpass_reread_n_fixations_{algo_choice}": 0}, inplace=True) |
| |
|
| | if "firstpass_reread_dur" in measures_to_calc: |
| | tmp = ( |
| | fixin[(fixin[f"firstpass_{algo_choice}"] == 1) & (fixin[f"forward_{algo_choice}"] == 0)] |
| | .groupby("id")["duration"] |
| | .sum() |
| | .reset_index() |
| | ) |
| | tmp.columns = ["id", f"firstpass_reread_dur_{algo_choice}"] |
| | sent = pd.merge(sent, tmp, on="id", how="left") |
| | sent.fillna({f"firstpass_reread_dur_{algo_choice}": 0}, inplace=True) |
| |
|
| | if sum(fixin[f"firstpass_{algo_choice}"] == 0) != 0: |
| | if "lookback_n_fixations" in measures_to_calc: |
| | tmp = fixin[fixin[f"firstpass_{algo_choice}"] == 0].groupby("id")["duration"].count().reset_index() |
| | tmp.columns = ["id", f"lookback_n_fixations_{algo_choice}"] |
| | sent = pd.merge(sent, tmp, on="id", how="left") |
| | sent.fillna({f"lookback_n_fixations_{algo_choice}": 0}, inplace=True) |
| |
|
| | if "lookback_dur" in measures_to_calc: |
| | tmp = fixin[fixin[f"firstpass_{algo_choice}"] == 0].groupby("id")["duration"].sum().reset_index() |
| | tmp.columns = ["id", f"lookback_dur_{algo_choice}"] |
| | sent = pd.merge(sent, tmp, on="id", how="left") |
| | sent.fillna({f"lookback_dur_{algo_choice}": 0}, inplace=True) |
| |
|
| | fixin["id2"] = fixin.apply(lambda row: f"{row['id']}:{row[f'sent_runid2_{algo_choice}']}", axis=1) |
| | sent2 = fixin.drop_duplicates(subset="id2", keep="first") |
| | sent3 = sent2[(sent2[f"firstpass_{algo_choice}"] == 0) & (~pd.isna(sent2[f"sent_reg_in_from2_{algo_choice}"]))] |
| |
|
| | tmp = fixin[fixin["id2"].isin(sent3["id2"])].groupby("id")["duration"].count().reset_index() |
| | tmp.columns = ["id", f"lookfrom_n_fixations_{algo_choice}"] |
| | tmp2 = pd.merge(tmp, sent3) |
| | tmp3 = tmp2.groupby("last")[f"lookfrom_n_fixations_{algo_choice}"].sum().reset_index() |
| | tmp3.columns = ["last", f"lookfrom_n_fixations_{algo_choice}"] |
| | sent = pd.merge(sent, tmp3, left_on="id", right_on="last", how="left") |
| | sent.fillna({f"lookfrom_n_fixations_{algo_choice}": 0}, inplace=True) |
| |
|
| | if "lookfrom_dur" in measures_to_calc: |
| | tmp = fixin[fixin["id2"].isin(sent3["id2"])].groupby("id")["duration"].sum().reset_index() |
| | tmp.columns = ["id", f"lookfrom_dur_{algo_choice}"] |
| | tmp2 = pd.merge(tmp, sent3) |
| | tmp3 = tmp2.groupby("last")[f"lookfrom_dur_{algo_choice}"].sum().reset_index() |
| | tmp3.columns = ["last", f"lookfrom_dur_{algo_choice}"] |
| | sent = pd.merge(sent, tmp3, left_on="id", right_on="last", how="left") |
| | sent.fillna({f"lookfrom_dur_{algo_choice}": 0}, inplace=True) |
| |
|
| | |
| | firstruntmp = fixin[fixin[f"sentence_run_{algo_choice}"] == 1] |
| |
|
| | if "firstrun_reg_in" in measures_to_calc: |
| | tmp = firstruntmp.groupby("id")[f"sent_reg_in2_{algo_choice}"].max().reset_index() |
| | tmp.columns = ["id", f"firstrun_reg_in_{algo_choice}"] |
| | sent = pd.merge(sent, tmp, on="id", how="left") |
| | sent.fillna({f"firstrun_reg_in_{algo_choice}": 0}, inplace=True) |
| |
|
| | if "firstrun_reg_out" in measures_to_calc: |
| | tmp = firstruntmp.groupby("id")[f"sent_reg_out2_{algo_choice}"].max().reset_index() |
| | tmp.columns = ["id", f"firstrun_reg_out_{algo_choice}"] |
| | sent = pd.merge(sent, tmp, on="id", how="left") |
| | sent.fillna({f"firstrun_reg_out_{algo_choice}": 0}, inplace=True) |
| |
|
| | |
| | gopasttmp = fixin.copy() |
| | gopasttmp[f"on_sentence_num_{algo_choice}"] = gopasttmp["on_sentence_num2"] |
| | tmp = compute_gopast_sentence(gopasttmp, algo_choice) |
| | names = ["id", f"gopast_{algo_choice}", f"selgopast_{algo_choice}"] |
| | tmp = tmp[names] |
| | tmp = tmp.drop_duplicates(subset="id", keep="first") |
| | tmp.columns = ["id", f"gopast_{algo_choice}", f"gopast_sel_{algo_choice}"] |
| | sent = pd.merge(sent, tmp, on="id", how="left") |
| |
|
| | |
| | tmp = fixin.groupby("id")[f"sentence_run_{algo_choice}"].max().reset_index() |
| | tmp.columns = ["id", f"nrun_{algo_choice}"] |
| | sent = pd.merge(sent, tmp, on="id", how="left") |
| |
|
| | |
| | sent[f"reread_{algo_choice}"] = sent.apply(lambda row: 1 if row[f"nrun_{algo_choice}"] > 1 else 0, axis=1) |
| |
|
| | |
| | tmp = fixin.groupby("id")[f"sent_reg_in2_{algo_choice}"].max().reset_index() |
| | tmp.columns = ["id", f"reg_in_{algo_choice}"] |
| | sent = pd.merge(sent, tmp, on="id", how="left") |
| |
|
| | |
| | tmp = fixin.groupby("id")[f"sent_reg_out2_{algo_choice}"].max().reset_index() |
| | tmp.columns = ["id", f"reg_out_{algo_choice}"] |
| | sent = pd.merge(sent, tmp, on="id", how="left") |
| |
|
| | sent = sent.sort_values(by=f"on_sentence_num_{algo_choice}").reset_index(drop=True) |
| |
|
| | |
| | sent[f"rate_{algo_choice}"] = round(60000 / (sent[f"total_dur_{algo_choice}"] / sent["num_words_in_sentence"])) |
| |
|
| | |
| | item = sentitem.copy() |
| |
|
| | sent = pd.merge( |
| | sent, |
| | item.rename({"in_sentence_number": f"on_sentence_num_{algo_choice}"}, axis=1), |
| | on=f"on_sentence_num_{algo_choice}", |
| | how="left", |
| | suffixes=['','from_item'] |
| | ) |
| | sent[f"skip_{algo_choice}"] = 0 |
| | sent.loc[pd.isna(sent[f"nrun_{algo_choice}"]), f"skip_{algo_choice}"] = 1 |
| |
|
| | names = [ |
| | "subject", |
| | "trial_id", |
| | "item", |
| | "condition", |
| | ] + [ |
| | c |
| | for c in [ |
| | f"on_sentence_num_{algo_choice}", |
| | f"on_sentence_{algo_choice}", |
| | "num_words_in_sentence", |
| | f"skip_{algo_choice}", |
| | f"nrun_{algo_choice}", |
| | f"reread_{algo_choice}", |
| | f"reg_in_{algo_choice}", |
| | f"reg_out_{algo_choice}", |
| | f"total_n_fixations_{algo_choice}", |
| | f"total_dur_{algo_choice}", |
| | f"rate_{algo_choice}", |
| | f"gopast_{algo_choice}", |
| | f"gopast_sel_{algo_choice}", |
| | f"firstrun_skip_{algo_choice}", |
| | f"firstrun_reg_in_{algo_choice}", |
| | f"firstrun_reg_out_{algo_choice}", |
| | f"firstpass_n_fixations_{algo_choice}", |
| | f"firstpass_dur_{algo_choice}", |
| | f"firstpass_forward_n_fixations_{algo_choice}", |
| | f"firstpass_forward_dur_{algo_choice}", |
| | f"firstpass_reread_n_fixations_{algo_choice}", |
| | f"firstpass_reread_dur_{algo_choice}", |
| | f"lookback_n_fixations_{algo_choice}", |
| | f"lookback_dur_{algo_choice}", |
| | f"lookfrom_n_fixations_{algo_choice}", |
| | f"lookfrom_dur_{algo_choice}", |
| | ] |
| | if (c in sent.columns and c.replace(f"_{algo_choice}", "") in measures_to_calc) |
| | ] |
| | sent = sent[names].copy() |
| | sent.rename( |
| | { |
| | f"on_sentence_num_{algo_choice}": "sentence_number", |
| | f"on_sentence_{algo_choice}": "sentence", |
| | "num_words_in_sentence": "number_of_words", |
| | }, |
| | axis=1, |
| | inplace=True, |
| | ) |
| |
|
| | if save_to_csv: |
| | subj = fix["subject"].iloc[0] |
| | trial_id = fix["trial_id"].iloc[0] |
| | sent.to_csv(RESULTS_FOLDER / f"{subj}_{trial_id}_{algo_choice}_sentence_measures.csv") |
| | return sent.copy() |
| |
|
| |
|
| | def compute_gopast_sentence(fixin, algo_choice): |
| | |
| | fixin[f"gopast_{algo_choice}"] = np.nan |
| | fixin[f"selgopast_{algo_choice}"] = np.nan |
| |
|
| | |
| | ias = fixin[f"on_sentence_num_{algo_choice}"].unique() |
| |
|
| | |
| | for j in ias: |
| | min_fixation_number_j = fixin.loc[fixin[f"on_sentence_num_{algo_choice}"] == j, "fixation_number"].min( |
| | skipna=True |
| | ) |
| | next_min_fixation_number = ( |
| | fixin.loc[fixin[f"on_sentence_num_{algo_choice}"] > j, "fixation_number"].min(skipna=True) |
| | if j != ias[-1] |
| | else float("inf") |
| | ) |
| |
|
| | mask = ( |
| | (fixin["fixation_number"] >= min_fixation_number_j) |
| | & (fixin["fixation_number"] < next_min_fixation_number) |
| | & (~fixin[f"on_sentence_num_{algo_choice}"].isna()) |
| | ) |
| | fixin.loc[fixin[f"on_sentence_num_{algo_choice}"] == j, f"gopast_{algo_choice}"] = fixin.loc[ |
| | mask, "duration" |
| | ].sum(skipna=True) |
| |
|
| | mask_j = ( |
| | (fixin["fixation_number"] >= min_fixation_number_j) |
| | & (fixin["fixation_number"] < next_min_fixation_number) |
| | & (~fixin[f"on_sentence_num_{algo_choice}"].isna()) |
| | & (fixin[f"on_sentence_num_{algo_choice}"] == j) |
| | ) |
| | fixin.loc[fixin[f"on_sentence_num_{algo_choice}"] == j, f"selgopast_{algo_choice}"] = fixin.loc[ |
| | mask_j, "duration" |
| | ].sum(skipna=True) |
| |
|
| | return fixin |
| |
|
| |
|
| | def aggregate_trials(dffix_combined, wordcomb, all_trials_by_subj, algo_choices): |
| | tmp = dffix_combined.copy() |
| |
|
| | trial = tmp.drop_duplicates(subset="subject_trialID", keep="first") |
| | names = ["subject_trialID", "subject", "trial_id", "item", "condition"] |
| | trial = trial[names].copy() |
| |
|
| | for index, row in trial.iterrows(): |
| | |
| | if row["subject"] not in all_trials_by_subj: |
| | print(f"Warning: Subject '{row['subject']}' not found in all_trials_by_subj. Skipping trial {row['trial_id']}.") |
| | continue |
| | if row["trial_id"] not in all_trials_by_subj[row["subject"]]: |
| | print(f"Warning: Trial '{row['trial_id']}' not found for subject '{row['subject']}'. Skipping.") |
| | continue |
| | |
| | selected_trial = all_trials_by_subj[row["subject"]][row["trial_id"]] |
| | info_keys = [ |
| | k for k in selected_trial.keys() if k in ["trial_start_time", "trial_end_time", "question_correct"] |
| | ] |
| | |
| | |
| | if "Fixation Cleaning Stats" in selected_trial: |
| | if selected_trial["Fixation Cleaning Stats"].get("Discard fixation before or after blinks", False): |
| | trial.at[index, "blink"] = selected_trial["Fixation Cleaning Stats"].get( |
| | "Number of discarded fixations due to blinks", 0 |
| | ) |
| | for key, value in selected_trial.items(): |
| | if key in info_keys: |
| | trial.at[index, key] = value |
| |
|
| | subdf = wordcomb.copy().loc[:, ["subject_trialID"]].drop_duplicates(subset=["subject_trialID"], keep="first") |
| | trial = pd.merge(trial, subdf, on="subject_trialID", how="left") |
| | for sub, subdf in wordcomb.groupby("subject"): |
| | for trialid, trialdf in subdf.groupby("trial_id"): |
| | trial.loc[((trial["subject"] == sub) & (trial["trial_id"] == trialid)), "number_of_words_in_trial"] = ( |
| | trialdf["word"].count() |
| | ) |
| | trial.sort_values(by="subject_trialID", inplace=True) |
| |
|
| | if "blink" in tmp.columns: |
| | blink = tmp.groupby("subject_trialID")["blink"].sum() / 2 |
| | blink = blink.round().reset_index() |
| | trial = pd.merge(trial, blink, on="subject_trialID", how="left") |
| |
|
| | trial["nfix"] = tmp.groupby("subject_trialID")["fixation_number"].agg("count").values |
| | new_col_dfs = [] |
| | new_col_dfs.append(tmp.groupby("subject_trialID")["duration"].agg("mean").reset_index(name="mean_fix_duration")) |
| |
|
| | new_col_dfs.append(tmp.groupby("subject_trialID")["duration"].agg("sum").reset_index(name="total_fix_duration")) |
| | for algo_choice in algo_choices: |
| | new_col_dfs.append( |
| | tmp.groupby("subject_trialID")[f"word_runid_{algo_choice}"] |
| | .agg("max") |
| | .reset_index(name=f"nrun_{algo_choice}") |
| | ) |
| | tmp[f"saccade_length_{algo_choice}"] = tmp[f"word_land_{algo_choice}"] + tmp[f"word_launch_{algo_choice}"] |
| | new_col_dfs.append( |
| | tmp[(tmp[f"saccade_length_{algo_choice}"] >= 0) & tmp[f"saccade_length_{algo_choice}"].notna()] |
| | .groupby("subject_trialID")[f"saccade_length_{algo_choice}"] |
| | .agg("mean") |
| | .reset_index(name=f"saccade_length_{algo_choice}") |
| | ) |
| |
|
| | word = wordcomb.copy() |
| | if f"firstrun_skip_{algo_choice}" in wordcomb.columns: |
| | new_col_dfs.append( |
| | word.groupby("subject_trialID")[f"firstrun_skip_{algo_choice}"] |
| | .agg("mean") |
| | .reset_index(name=f"skip_{algo_choice}") |
| | ) |
| | if f"refix_{algo_choice}" in wordcomb.columns: |
| | new_col_dfs.append( |
| | word.groupby("subject_trialID")[f"refix_{algo_choice}"] |
| | .agg("mean") |
| | .reset_index(name=f"refix_{algo_choice}") |
| | ) |
| | if f"reg_in_{algo_choice}" in wordcomb.columns: |
| | new_col_dfs.append( |
| | word.groupby("subject_trialID")[f"reg_in_{algo_choice}"] |
| | .agg("mean") |
| | .reset_index(name=f"reg_{algo_choice}") |
| | ) |
| |
|
| | if f"firstrun_dur_{algo_choice}" in wordcomb.columns: |
| | new_col_dfs.append( |
| | word.groupby("subject_trialID")[f"firstrun_dur_{algo_choice}"] |
| | .agg("sum") |
| | .reset_index(name=f"firstpass_{algo_choice}") |
| | ) |
| |
|
| | if f"total_fixation_duration_{algo_choice}" in wordcomb.columns: |
| | new_col_dfs.append( |
| | (word[f"total_fixation_duration_{algo_choice}"] - word[f"firstrun_dur_{algo_choice}"]) |
| | .groupby(word["subject_trialID"]) |
| | .agg("sum") |
| | .reset_index(name=f"rereading_{algo_choice}") |
| | ) |
| | trial = pd.concat( |
| | [trial.set_index("subject_trialID")] + [df.set_index("subject_trialID") for df in new_col_dfs], axis=1 |
| | ).reset_index() |
| | trial[f"reading_rate_{algo_choice}"] = ( |
| | 60000 / (trial["total_fix_duration"] / trial["number_of_words_in_trial"]) |
| | ).round() |
| |
|
| | return trial.copy() |
| |
|
| |
|
| | def aggregate_subjects(trials, algo_choices): |
| | base_cols = [col for col in ["nfix", "blink"] if col in trials.columns] |
| | if base_cols: |
| | trial_aggregates = trials.groupby("subject")[base_cols].mean().round(3).reset_index() |
| | else: |
| | trial_aggregates = trials[["subject"]].drop_duplicates().reset_index(drop=True) |
| |
|
| | if "question_correct" in trials.columns: |
| | qc_series = trials["question_correct"].copy() |
| | if qc_series.dtype == "object": |
| | qc_series = qc_series.replace( |
| | { |
| | "True": True, |
| | "true": True, |
| | "FALSE": False, |
| | "False": False, |
| | "false": False, |
| | "TRUE": True, |
| | "": pd.NA, |
| | None: pd.NA, |
| | } |
| | ) |
| | try: |
| | qc_boolean = qc_series.astype("boolean") |
| | except (TypeError, ValueError): |
| | qc_boolean = qc_series.apply(lambda x: bool(x) if pd.notna(x) else pd.NA).astype("boolean") |
| | qc_int = qc_boolean.astype("Int64") |
| | question_counts = ( |
| | qc_int.groupby(trials["subject"]) |
| | .sum(min_count=0) |
| | .fillna(0) |
| | .astype("Int64") |
| | .rename("n_question_correct") |
| | .reset_index() |
| | ) |
| | trial_aggregates = trial_aggregates.merge(question_counts, on="subject", how="left") |
| | trial_aggregates["n_question_correct"] = trial_aggregates["n_question_correct"].astype("Int64") |
| | else: |
| | trial_aggregates["n_question_correct"] = pd.Series(0, index=trial_aggregates.index, dtype="Int64") |
| |
|
| | trial_aggregates = trial_aggregates.merge( |
| | trials.groupby("subject")["trial_id"].count().reset_index(name="ntrial"), on="subject" |
| | ) |
| | for algo_choice in algo_choices: |
| | cols_to_do = [ |
| | c |
| | for c in [ |
| | f"saccade_length_{algo_choice}", |
| | f"reg_{algo_choice}", |
| | f"mean_fix_duration_{algo_choice}", |
| | f"total_fix_duration_{algo_choice}", |
| | f"reading_rate_{algo_choice}", |
| | f"refix_{algo_choice}", |
| | f"nrun_{algo_choice}", |
| | f"skip_{algo_choice}", |
| | ] |
| | if c in trials.columns |
| | ] |
| | if cols_to_do: |
| | trial_aggregates_temp = trials.groupby("subject")[cols_to_do].mean().round(3).reset_index() |
| | trial_aggregates = pd.merge(trial_aggregates, trial_aggregates_temp, how="left", on="subject") |
| |
|
| | return trial_aggregates |
| |
|