Spaces:
Sleeping
Sleeping
File size: 2,425 Bytes
4ba360f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | #!/usr/bin/env python3
"""
Save the trained model and artifacts
"""
import joblib
import json
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
def main():
# Load the processed data
df = pd.read_csv('data/processed/telco_churn_processed.csv')
# Convert target to numeric
df['Churn'] = df['Churn'].map({'No': 0, 'Yes': 1})
# Separate features and target
feature_columns = [col for col in df.columns if col != 'Churn']
X = df[feature_columns]
y = df['Churn']
# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
scale_pos_weight = (y_train == 0).sum() / (y_train == 1).sum()
print(f"Class imbalance ratio: {scale_pos_weight:.2f}")
model = XGBClassifier(
n_estimators=300,
learning_rate=0.1,
max_depth=6,
random_state=42,
n_jobs=-1,
eval_metric="logloss",
scale_pos_weight=scale_pos_weight
)
print("Training model...")
model.fit(X_train, y_train)
# Save model
import os
artifacts_dir = 'artifacts'
os.makedirs(artifacts_dir, exist_ok=True)
model_path = os.path.join(artifacts_dir, 'model.pkl')
joblib.dump(model, model_path)
print(f"Model saved to {model_path}")
# Save feature columns
feature_columns_path = os.path.join(artifacts_dir, 'feature_columns.json')
with open(feature_columns_path, 'w') as f:
json.dump(feature_columns, f)
print(f"Feature columns saved to {feature_columns_path}")
# Test the model
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
threshold = 0.35
y_pred_thresholded = (y_pred_proba >= threshold).astype(int)
metrics = {
'accuracy': accuracy_score(y_test, y_pred_thresholded),
'precision': precision_score(y_test, y_pred_thresholded),
'recall': recall_score(y_test, y_pred_thresholded),
'f1': f1_score(y_test, y_pred_thresholded),
'roc_auc': roc_auc_score(y_test, y_pred_proba)
}
print("\nModel Performance:")
for metric, value in metrics.items():
print(f"{metric}: {value:.3f}")
if __name__ == "__main__":
main()
|