Spaces:
Sleeping
Sleeping
File size: 5,141 Bytes
4ba360f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | #!/usr/bin/env python3
"""
Test the inference pipeline with sample data
"""
import sys
import os
import json
import pandas as pd
import joblib
# Add src to path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
def load_model_and_artifacts():
"""Load the trained model and required artifacts"""
# Load model
model_path = "artifacts/model.pkl"
model = joblib.load(model_path)
print(f"Model loaded from {model_path}")
# Load feature columns
feature_columns_path = "artifacts/feature_columns.json"
with open(feature_columns_path, 'r') as f:
feature_columns = json.load(f)
print(f"Feature columns loaded: {len(feature_columns)} features")
# Load threshold
threshold_path = "artifacts/threshold.json"
with open(threshold_path, 'r') as f:
threshold_config = json.load(f)
threshold = threshold_config["threshold"]
print(f"Classification threshold: {threshold}")
return model, feature_columns, threshold
def transform_input_data(data, feature_columns):
"""
Transform input data to match training format
This replicates the feature engineering from training
"""
df = pd.DataFrame([data])
# Binary mappings (must match training)
BINARY_MAP = {
"No": 0, "Yes": 1,
"Female": 0, "Male": 1,
"No phone service": 0, "Yes": 1,
"No internet service": 0, "Yes": 1
}
# Apply binary encoding
binary_features = ["gender", "Partner", "Dependents", "PhoneService", "PaperlessBilling"]
for feature in binary_features:
if feature in df.columns:
df[feature] = df[feature].map(BINARY_MAP).fillna(0).astype(int)
# One-hot encoding for multi-category features
multi_features = ["MultipleLines", "InternetService", "OnlineSecurity",
"OnlineBackup", "DeviceProtection", "TechSupport",
"StreamingTV", "StreamingMovies", "Contract", "PaymentMethod"]
# Apply one-hot encoding
df_encoded = pd.get_dummies(df, columns=multi_features, drop_first=True)
# Ensure all expected features exist (fill missing with 0)
for col in feature_columns:
if col not in df_encoded.columns:
df_encoded[col] = 0
# Reorder columns to match training
df_final = df_encoded[feature_columns]
return df_final
def predict_churn(customer_data):
"""Make prediction for a single customer"""
# Load model and artifacts
model, feature_columns, threshold = load_model_and_artifacts()
# Transform input data
X = transform_input_data(customer_data, feature_columns)
# Make prediction
prediction_proba = model.predict_proba(X)[0, 1]
prediction_binary = (prediction_proba >= threshold).astype(int)
result = {
"churn_probability": float(prediction_proba),
"churn_prediction": "Yes" if prediction_binary == 1 else "No",
"threshold_used": threshold,
"confidence": "High" if prediction_proba > 0.7 or prediction_proba < 0.3 else "Medium"
}
return result
def main():
"""Test inference with sample customers"""
print("=== Testing Churn Prediction Inference ===\n")
# Sample customer 1: High churn risk
customer_high_risk = {
"gender": "Female",
"SeniorCitizen": 0,
"Partner": "No",
"Dependents": "No",
"tenure": 1,
"PhoneService": "Yes",
"MultipleLines": "No",
"InternetService": "Fiber optic",
"OnlineSecurity": "No",
"OnlineBackup": "No",
"DeviceProtection": "No",
"TechSupport": "No",
"StreamingTV": "No",
"StreamingMovies": "No",
"Contract": "Month-to-month",
"PaperlessBilling": "Yes",
"PaymentMethod": "Electronic check",
"MonthlyCharges": 75.50,
"TotalCharges": 75.50
}
# Sample customer 2: Low churn risk
customer_low_risk = {
"gender": "Male",
"SeniorCitizen": 0,
"Partner": "Yes",
"Dependents": "Yes",
"tenure": 60,
"PhoneService": "Yes",
"MultipleLines": "Yes",
"InternetService": "DSL",
"OnlineSecurity": "Yes",
"OnlineBackup": "Yes",
"DeviceProtection": "Yes",
"TechSupport": "Yes",
"StreamingTV": "Yes",
"StreamingMovies": "Yes",
"Contract": "Two year",
"PaperlessBilling": "No",
"PaymentMethod": "Bank transfer (automatic)",
"MonthlyCharges": 95.00,
"TotalCharges": 5700.00
}
# Test both customers
print("Customer 1 (High Risk Profile):")
print(f"Input: {customer_high_risk}")
result1 = predict_churn(customer_high_risk)
print(f"Prediction: {result1}")
print()
print("Customer 2 (Low Risk Profile):")
print(f"Input: {customer_low_risk}")
result2 = predict_churn(customer_low_risk)
print(f"Prediction: {result2}")
print()
print("=== Inference Testing Completed Successfully! ===")
if __name__ == "__main__":
main()
|