#!/usr/bin/env python3 """ Save the trained model and artifacts """ import joblib import json import pandas as pd from sklearn.model_selection import train_test_split from xgboost import XGBClassifier def main(): # Load the processed data df = pd.read_csv('data/processed/telco_churn_processed.csv') # Convert target to numeric df['Churn'] = df['Churn'].map({'No': 0, 'Yes': 1}) # Separate features and target feature_columns = [col for col in df.columns if col != 'Churn'] X = df[feature_columns] y = df['Churn'] # Train model X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) scale_pos_weight = (y_train == 0).sum() / (y_train == 1).sum() print(f"Class imbalance ratio: {scale_pos_weight:.2f}") model = XGBClassifier( n_estimators=300, learning_rate=0.1, max_depth=6, random_state=42, n_jobs=-1, eval_metric="logloss", scale_pos_weight=scale_pos_weight ) print("Training model...") model.fit(X_train, y_train) # Save model import os artifacts_dir = 'artifacts' os.makedirs(artifacts_dir, exist_ok=True) model_path = os.path.join(artifacts_dir, 'model.pkl') joblib.dump(model, model_path) print(f"Model saved to {model_path}") # Save feature columns feature_columns_path = os.path.join(artifacts_dir, 'feature_columns.json') with open(feature_columns_path, 'w') as f: json.dump(feature_columns, f) print(f"Feature columns saved to {feature_columns_path}") # Test the model y_pred = model.predict(X_test) y_pred_proba = model.predict_proba(X_test)[:, 1] from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score threshold = 0.35 y_pred_thresholded = (y_pred_proba >= threshold).astype(int) metrics = { 'accuracy': accuracy_score(y_test, y_pred_thresholded), 'precision': precision_score(y_test, y_pred_thresholded), 'recall': recall_score(y_test, y_pred_thresholded), 'f1': f1_score(y_test, y_pred_thresholded), 'roc_auc': roc_auc_score(y_test, y_pred_proba) } print("\nModel Performance:") for metric, value in metrics.items(): print(f"{metric}: {value:.3f}") if __name__ == "__main__": main()