Spaces:

logan-codes
/

telco-churn-predictor

Sleeping

App Files Files Community

telco-churn-predictor / scripts /save_model.py

logan-codes

Add Dockerfile, Gradio app, and core src modules

4ba360f about 1 month ago

raw

history blame contribute delete

2.43 kB

	#!/usr/bin/env python3
	"""
	Save the trained model and artifacts
	"""

	import joblib
	import json
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from xgboost import XGBClassifier

	def main():
	# Load the processed data
	df = pd.read_csv('data/processed/telco_churn_processed.csv')

	# Convert target to numeric
	df['Churn'] = df['Churn'].map({'No': 0, 'Yes': 1})

	# Separate features and target
	feature_columns = [col for col in df.columns if col != 'Churn']
	X = df[feature_columns]
	y = df['Churn']

	# Train model
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

	scale_pos_weight = (y_train == 0).sum() / (y_train == 1).sum()
	print(f"Class imbalance ratio: {scale_pos_weight:.2f}")

	model = XGBClassifier(
	n_estimators=300,
	learning_rate=0.1,
	max_depth=6,
	random_state=42,
	n_jobs=-1,
	eval_metric="logloss",
	scale_pos_weight=scale_pos_weight
	)

	print("Training model...")
	model.fit(X_train, y_train)

	# Save model
	import os
	artifacts_dir = 'artifacts'
	os.makedirs(artifacts_dir, exist_ok=True)

	model_path = os.path.join(artifacts_dir, 'model.pkl')
	joblib.dump(model, model_path)
	print(f"Model saved to {model_path}")

	# Save feature columns
	feature_columns_path = os.path.join(artifacts_dir, 'feature_columns.json')
	with open(feature_columns_path, 'w') as f:
	json.dump(feature_columns, f)
	print(f"Feature columns saved to {feature_columns_path}")

	# Test the model
	y_pred = model.predict(X_test)
	y_pred_proba = model.predict_proba(X_test)[:, 1]

	from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

	threshold = 0.35
	y_pred_thresholded = (y_pred_proba >= threshold).astype(int)

	metrics = {
	'accuracy': accuracy_score(y_test, y_pred_thresholded),
	'precision': precision_score(y_test, y_pred_thresholded),
	'recall': recall_score(y_test, y_pred_thresholded),
	'f1': f1_score(y_test, y_pred_thresholded),
	'roc_auc': roc_auc_score(y_test, y_pred_proba)
	}

	print("\nModel Performance:")
	for metric, value in metrics.items():
	print(f"{metric}: {value:.3f}")

	if __name__ == "__main__":
	main()