Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import joblib | |
| from sklearn.preprocessing import MinMaxScaler | |
| from imblearn.combine import SMOTETomek | |
| from sklearn.linear_model import LogisticRegression | |
| from src.utils import load_config, get_versioned_path | |
| from src.ingestion import load_raw_data | |
| from src.preprocessing import clean_and_engineer | |
| def train_pipeline(): | |
| config = load_config() | |
| df = load_raw_data() | |
| df = clean_and_engineer(df) | |
| target = config["data"]["target"] | |
| X = df.drop(columns=[target]) | |
| y = df[target] | |
| X_encoded = pd.get_dummies(X, drop_first=True) | |
| scaler = MinMaxScaler() | |
| X_scaled = scaler.fit_transform(X_encoded) | |
| smt = SMOTETomek(random_state=config["training"]["random_state"]) | |
| X_res, y_res = smt.fit_resample(X_scaled, y) | |
| params = config["model"]["params"] | |
| model = LogisticRegression(**params) | |
| model.fit(X_res, y_res) | |
| model_path = get_versioned_path(config["artifacts"]["model_dir"], "credit_model", "pkl") | |
| scaler_path = get_versioned_path(config["artifacts"]["model_dir"], "scaler", "pkl") | |
| columns_path = get_versioned_path(config["artifacts"]["model_dir"], "columns", "pkl") | |
| joblib.dump(model, model_path) | |
| joblib.dump(scaler, scaler_path) | |
| joblib.dump(list(X_encoded.columns), columns_path) | |
| print(f"Model saved at: {model_path}") | |
| print(f"Scaler saved at: {scaler_path}") | |
| print(f"Columns saved at: {columns_path}") | |
| return model_path, scaler_path, columns_path | |
| if __name__ == "__main__": | |
| train_pipeline() |