Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
| from sklearn.compose import ColumnTransformer | |
| def prepare_data(df: pd.DataFrame): | |
| # Drop leakage / unnecessary columns | |
| df = df.drop(columns=["AQI_Bucket"], errors="ignore") | |
| df = df.drop(columns=["Date"], errors="ignore") | |
| # Split features & target | |
| X = df.drop(columns=["AQI"]) | |
| y = df["AQI"] | |
| # Train-test split | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X, y, test_size=0.2, random_state=42 | |
| ) | |
| # Column selection | |
| num_cols = X.select_dtypes(include=["number"]).columns | |
| cat_cols = X.select_dtypes(include=["object"]).columns | |
| # Preprocessing pipeline | |
| preprocessor = ColumnTransformer( | |
| transformers=[ | |
| ("num", StandardScaler(), num_cols), | |
| ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols) | |
| ] | |
| ) | |
| return X_train, X_test, y_train, y_test, preprocessor |