| import numpy as np | |
| import pandas as pd | |
| def temporal_split(df: pd.DataFrame, train_ratio=0.7, val_ratio=0.15): | |
| df = df.sort_values("timestamp").reset_index(drop=True) | |
| # time thresholds (CRITICAL) | |
| t_train = df["timestamp"].quantile(train_ratio) | |
| t_val = df["timestamp"].quantile(train_ratio + val_ratio) | |
| train_mask = df["timestamp"] <= t_train | |
| val_mask = (df["timestamp"] > t_train) & (df["timestamp"] <= t_val) | |
| test_mask = df["timestamp"] > t_val | |
| return train_mask, val_mask, test_mask, t_train |