Spaces:
Sleeping
Sleeping
| import os, sys | |
| import pandas as pd | |
| # make src importable | |
| sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) | |
| from src.data_scripts.preprocess import preprocess_data | |
| from src.features.build_features import build_features | |
| RAW = r"E:\Coding\AIMl\Churn\data\raw\WA_Fn-UseC_-Telco-Customer-Churn.csv" | |
| OUT = r"E:\Coding\AIMl\Churn\data\processed\WA_Fn-UseC_-Telco-Customer-Churn.csv" | |
| # 1) load raw | |
| df = pd.read_csv(RAW) | |
| # 2) preprocess (drops id, fixes TotalCharges, etc.) | |
| df = preprocess_data(df, target_col="Churn") | |
| # 3) ensure target is 0/1 only if still object | |
| # 🔥 Always normalize target column (robust) | |
| df["Churn"] = ( | |
| df["Churn"] | |
| .astype(str) | |
| .str.strip() | |
| .str.lower() | |
| .map({"no": 0, "yes": 1}) | |
| ) | |
| # sanity checks | |
| assert df["Churn"].isna().sum() == 0, "Churn has NaNs after preprocess" | |
| assert set(df["Churn"].unique()) <= {0, 1}, "Churn not 0/1 after preprocess" | |
| # 4) features | |
| df_processed = build_features(df, target_col="Churn") | |
| # 5) save | |
| os.makedirs(os.path.dirname(OUT), exist_ok=True) | |
| df_processed.to_csv(OUT, index=False) | |
| print(f"✅ Processed dataset saved to {OUT} | Shape: {df_processed.shape}") | |