File size: 529 Bytes
a3682cf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | import numpy as np
import pandas as pd
def temporal_split(df: pd.DataFrame, train_ratio=0.7, val_ratio=0.15):
df = df.sort_values("timestamp").reset_index(drop=True)
# time thresholds (CRITICAL)
t_train = df["timestamp"].quantile(train_ratio)
t_val = df["timestamp"].quantile(train_ratio + val_ratio)
train_mask = df["timestamp"] <= t_train
val_mask = (df["timestamp"] > t_train) & (df["timestamp"] <= t_val)
test_mask = df["timestamp"] > t_val
return train_mask, val_mask, test_mask, t_train |