File size: 584 Bytes
9f0dbb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
"""Chargement & split des données traitées."""
from pathlib import Path
import pandas as pd
from sklearn.model_selection import train_test_split


def load_processed(path: str | Path = "data/processed/df_central_encode.csv") -> pd.DataFrame:
    return pd.read_csv(Path(path))


def split_xy(
    df: pd.DataFrame,
    target: str = "attrition",
    test_size: float = 0.2,
    seed: int = 42,
):
    X = df.drop(columns=[target])
    y = df[target]
    return train_test_split(
        X,
        y,
        test_size=test_size,
        stratify=y,
        random_state=seed,
    )