""" Train / Validation Split Module ================================== Split datasets with configurable ratio, seed, and shuffle. """ from dataclasses import dataclass from typing import Tuple import pandas as pd @dataclass class SplitConfig: """Configuration for train/validation split.""" enabled: bool = True train_ratio: float = 0.8 # e.g., 0.8 means 80% train, 20% val random_seed: int = 42 shuffle: bool = True def split_dataset( df: pd.DataFrame, config: SplitConfig, ) -> Tuple[pd.DataFrame, pd.DataFrame]: """ Split DataFrame into train and validation sets. Returns: (train_df, val_df) tuple """ if not config.enabled: return df, pd.DataFrame(columns=df.columns) if config.shuffle: df = df.sample(frac=1, random_state=config.random_seed).reset_index(drop=True) split_idx = int(len(df) * config.train_ratio) train_df = df.iloc[:split_idx].reset_index(drop=True) val_df = df.iloc[split_idx:].reset_index(drop=True) return train_df, val_df