Spaces:

logan-codes
/

telco-churn-predictor

Sleeping

App Files Files Community

telco-churn-predictor / scripts /test_inference.py

logan-codes

Add Dockerfile, Gradio app, and core src modules

4ba360f about 1 month ago

raw

history blame contribute delete

5.14 kB

	#!/usr/bin/env python3
	"""
	Test the inference pipeline with sample data
	"""

	import sys
	import os
	import json
	import pandas as pd
	import joblib

	# Add src to path
	sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

	def load_model_and_artifacts():
	"""Load the trained model and required artifacts"""

	# Load model
	model_path = "artifacts/model.pkl"
	model = joblib.load(model_path)
	print(f"Model loaded from {model_path}")

	# Load feature columns
	feature_columns_path = "artifacts/feature_columns.json"
	with open(feature_columns_path, 'r') as f:
	feature_columns = json.load(f)
	print(f"Feature columns loaded: {len(feature_columns)} features")

	# Load threshold
	threshold_path = "artifacts/threshold.json"
	with open(threshold_path, 'r') as f:
	threshold_config = json.load(f)
	threshold = threshold_config["threshold"]
	print(f"Classification threshold: {threshold}")

	return model, feature_columns, threshold

	def transform_input_data(data, feature_columns):
	"""
	Transform input data to match training format
	This replicates the feature engineering from training
	"""

	df = pd.DataFrame([data])

	# Binary mappings (must match training)
	BINARY_MAP = {
	"No": 0, "Yes": 1,
	"Female": 0, "Male": 1,
	"No phone service": 0, "Yes": 1,
	"No internet service": 0, "Yes": 1
	}

	# Apply binary encoding
	binary_features = ["gender", "Partner", "Dependents", "PhoneService", "PaperlessBilling"]
	for feature in binary_features:
	if feature in df.columns:
	df[feature] = df[feature].map(BINARY_MAP).fillna(0).astype(int)

	# One-hot encoding for multi-category features
	multi_features = ["MultipleLines", "InternetService", "OnlineSecurity",
	"OnlineBackup", "DeviceProtection", "TechSupport",
	"StreamingTV", "StreamingMovies", "Contract", "PaymentMethod"]

	# Apply one-hot encoding
	df_encoded = pd.get_dummies(df, columns=multi_features, drop_first=True)

	# Ensure all expected features exist (fill missing with 0)
	for col in feature_columns:
	if col not in df_encoded.columns:
	df_encoded[col] = 0

	# Reorder columns to match training
	df_final = df_encoded[feature_columns]

	return df_final

	def predict_churn(customer_data):
	"""Make prediction for a single customer"""

	# Load model and artifacts
	model, feature_columns, threshold = load_model_and_artifacts()

	# Transform input data
	X = transform_input_data(customer_data, feature_columns)

	# Make prediction
	prediction_proba = model.predict_proba(X)[0, 1]
	prediction_binary = (prediction_proba >= threshold).astype(int)

	result = {
	"churn_probability": float(prediction_proba),
	"churn_prediction": "Yes" if prediction_binary == 1 else "No",
	"threshold_used": threshold,
	"confidence": "High" if prediction_proba > 0.7 or prediction_proba < 0.3 else "Medium"
	}

	return result

	def main():
	"""Test inference with sample customers"""

	print("=== Testing Churn Prediction Inference ===\n")

	# Sample customer 1: High churn risk
	customer_high_risk = {
	"gender": "Female",
	"SeniorCitizen": 0,
	"Partner": "No",
	"Dependents": "No",
	"tenure": 1,
	"PhoneService": "Yes",
	"MultipleLines": "No",
	"InternetService": "Fiber optic",
	"OnlineSecurity": "No",
	"OnlineBackup": "No",
	"DeviceProtection": "No",
	"TechSupport": "No",
	"StreamingTV": "No",
	"StreamingMovies": "No",
	"Contract": "Month-to-month",
	"PaperlessBilling": "Yes",
	"PaymentMethod": "Electronic check",
	"MonthlyCharges": 75.50,
	"TotalCharges": 75.50
	}

	# Sample customer 2: Low churn risk
	customer_low_risk = {
	"gender": "Male",
	"SeniorCitizen": 0,
	"Partner": "Yes",
	"Dependents": "Yes",
	"tenure": 60,
	"PhoneService": "Yes",
	"MultipleLines": "Yes",
	"InternetService": "DSL",
	"OnlineSecurity": "Yes",
	"OnlineBackup": "Yes",
	"DeviceProtection": "Yes",
	"TechSupport": "Yes",
	"StreamingTV": "Yes",
	"StreamingMovies": "Yes",
	"Contract": "Two year",
	"PaperlessBilling": "No",
	"PaymentMethod": "Bank transfer (automatic)",
	"MonthlyCharges": 95.00,
	"TotalCharges": 5700.00
	}

	# Test both customers
	print("Customer 1 (High Risk Profile):")
	print(f"Input: {customer_high_risk}")
	result1 = predict_churn(customer_high_risk)
	print(f"Prediction: {result1}")
	print()

	print("Customer 2 (Low Risk Profile):")
	print(f"Input: {customer_low_risk}")
	result2 = predict_churn(customer_low_risk)
	print(f"Prediction: {result2}")
	print()

	print("=== Inference Testing Completed Successfully! ===")

	if __name__ == "__main__":
	main()