Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Save the trained model and artifacts | |
| """ | |
| import joblib | |
| import json | |
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| from xgboost import XGBClassifier | |
| def main(): | |
| # Load the processed data | |
| df = pd.read_csv('data/processed/telco_churn_processed.csv') | |
| # Convert target to numeric | |
| df['Churn'] = df['Churn'].map({'No': 0, 'Yes': 1}) | |
| # Separate features and target | |
| feature_columns = [col for col in df.columns if col != 'Churn'] | |
| X = df[feature_columns] | |
| y = df['Churn'] | |
| # Train model | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) | |
| scale_pos_weight = (y_train == 0).sum() / (y_train == 1).sum() | |
| print(f"Class imbalance ratio: {scale_pos_weight:.2f}") | |
| model = XGBClassifier( | |
| n_estimators=300, | |
| learning_rate=0.1, | |
| max_depth=6, | |
| random_state=42, | |
| n_jobs=-1, | |
| eval_metric="logloss", | |
| scale_pos_weight=scale_pos_weight | |
| ) | |
| print("Training model...") | |
| model.fit(X_train, y_train) | |
| # Save model | |
| import os | |
| artifacts_dir = 'artifacts' | |
| os.makedirs(artifacts_dir, exist_ok=True) | |
| model_path = os.path.join(artifacts_dir, 'model.pkl') | |
| joblib.dump(model, model_path) | |
| print(f"Model saved to {model_path}") | |
| # Save feature columns | |
| feature_columns_path = os.path.join(artifacts_dir, 'feature_columns.json') | |
| with open(feature_columns_path, 'w') as f: | |
| json.dump(feature_columns, f) | |
| print(f"Feature columns saved to {feature_columns_path}") | |
| # Test the model | |
| y_pred = model.predict(X_test) | |
| y_pred_proba = model.predict_proba(X_test)[:, 1] | |
| from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score | |
| threshold = 0.35 | |
| y_pred_thresholded = (y_pred_proba >= threshold).astype(int) | |
| metrics = { | |
| 'accuracy': accuracy_score(y_test, y_pred_thresholded), | |
| 'precision': precision_score(y_test, y_pred_thresholded), | |
| 'recall': recall_score(y_test, y_pred_thresholded), | |
| 'f1': f1_score(y_test, y_pred_thresholded), | |
| 'roc_auc': roc_auc_score(y_test, y_pred_proba) | |
| } | |
| print("\nModel Performance:") | |
| for metric, value in metrics.items(): | |
| print(f"{metric}: {value:.3f}") | |
| if __name__ == "__main__": | |
| main() | |