File size: 5,141 Bytes
4ba360f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/usr/bin/env python3
"""
Test the inference pipeline with sample data
"""

import sys
import os
import json
import pandas as pd
import joblib

# Add src to path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

def load_model_and_artifacts():
    """Load the trained model and required artifacts"""
    
    # Load model
    model_path = "artifacts/model.pkl"
    model = joblib.load(model_path)
    print(f"Model loaded from {model_path}")
    
    # Load feature columns
    feature_columns_path = "artifacts/feature_columns.json"
    with open(feature_columns_path, 'r') as f:
        feature_columns = json.load(f)
    print(f"Feature columns loaded: {len(feature_columns)} features")
    
    # Load threshold
    threshold_path = "artifacts/threshold.json"
    with open(threshold_path, 'r') as f:
        threshold_config = json.load(f)
    threshold = threshold_config["threshold"]
    print(f"Classification threshold: {threshold}")
    
    return model, feature_columns, threshold

def transform_input_data(data, feature_columns):
    """
    Transform input data to match training format
    This replicates the feature engineering from training
    """
    
    df = pd.DataFrame([data])
    
    # Binary mappings (must match training)
    BINARY_MAP = {
        "No": 0, "Yes": 1,
        "Female": 0, "Male": 1,
        "No phone service": 0, "Yes": 1,
        "No internet service": 0, "Yes": 1
    }
    
    # Apply binary encoding
    binary_features = ["gender", "Partner", "Dependents", "PhoneService", "PaperlessBilling"]
    for feature in binary_features:
        if feature in df.columns:
            df[feature] = df[feature].map(BINARY_MAP).fillna(0).astype(int)
    
    # One-hot encoding for multi-category features
    multi_features = ["MultipleLines", "InternetService", "OnlineSecurity", 
                     "OnlineBackup", "DeviceProtection", "TechSupport", 
                     "StreamingTV", "StreamingMovies", "Contract", "PaymentMethod"]
    
    # Apply one-hot encoding
    df_encoded = pd.get_dummies(df, columns=multi_features, drop_first=True)
    
    # Ensure all expected features exist (fill missing with 0)
    for col in feature_columns:
        if col not in df_encoded.columns:
            df_encoded[col] = 0
    
    # Reorder columns to match training
    df_final = df_encoded[feature_columns]
    
    return df_final

def predict_churn(customer_data):
    """Make prediction for a single customer"""
    
    # Load model and artifacts
    model, feature_columns, threshold = load_model_and_artifacts()
    
    # Transform input data
    X = transform_input_data(customer_data, feature_columns)
    
    # Make prediction
    prediction_proba = model.predict_proba(X)[0, 1]
    prediction_binary = (prediction_proba >= threshold).astype(int)
    
    result = {
        "churn_probability": float(prediction_proba),
        "churn_prediction": "Yes" if prediction_binary == 1 else "No",
        "threshold_used": threshold,
        "confidence": "High" if prediction_proba > 0.7 or prediction_proba < 0.3 else "Medium"
    }
    
    return result

def main():
    """Test inference with sample customers"""
    
    print("=== Testing Churn Prediction Inference ===\n")
    
    # Sample customer 1: High churn risk
    customer_high_risk = {
        "gender": "Female",
        "SeniorCitizen": 0,
        "Partner": "No", 
        "Dependents": "No",
        "tenure": 1,
        "PhoneService": "Yes",
        "MultipleLines": "No",
        "InternetService": "Fiber optic",
        "OnlineSecurity": "No",
        "OnlineBackup": "No", 
        "DeviceProtection": "No",
        "TechSupport": "No",
        "StreamingTV": "No",
        "StreamingMovies": "No",
        "Contract": "Month-to-month",
        "PaperlessBilling": "Yes",
        "PaymentMethod": "Electronic check",
        "MonthlyCharges": 75.50,
        "TotalCharges": 75.50
    }
    
    # Sample customer 2: Low churn risk
    customer_low_risk = {
        "gender": "Male",
        "SeniorCitizen": 0,
        "Partner": "Yes",
        "Dependents": "Yes", 
        "tenure": 60,
        "PhoneService": "Yes",
        "MultipleLines": "Yes",
        "InternetService": "DSL",
        "OnlineSecurity": "Yes",
        "OnlineBackup": "Yes",
        "DeviceProtection": "Yes", 
        "TechSupport": "Yes",
        "StreamingTV": "Yes",
        "StreamingMovies": "Yes",
        "Contract": "Two year",
        "PaperlessBilling": "No",
        "PaymentMethod": "Bank transfer (automatic)",
        "MonthlyCharges": 95.00,
        "TotalCharges": 5700.00
    }
    
    # Test both customers
    print("Customer 1 (High Risk Profile):")
    print(f"Input: {customer_high_risk}")
    result1 = predict_churn(customer_high_risk)
    print(f"Prediction: {result1}")
    print()
    
    print("Customer 2 (Low Risk Profile):")
    print(f"Input: {customer_low_risk}")
    result2 = predict_churn(customer_low_risk)
    print(f"Prediction: {result2}")
    print()
    
    print("=== Inference Testing Completed Successfully! ===")

if __name__ == "__main__":
    main()