File size: 4,357 Bytes
ad55197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import joblib
import os
import pandas as pd
import numpy as np

#global variables for models
kmeans_model = None
clv_model = None
scaler = None

def load_models():
    """Load all trained ML models"""
    global kmeans_model, clv_model, scaler

    
    current_dir = os.path.dirname(os.path.abspath(__file__))
    project_root = os.path.dirname(current_dir) 
    models_path = os.path.join(project_root, "models")
    
    print(f"Looking for models in: {models_path}")
    
    if not os.path.exists(models_path):
        print(f"Models folder not found at: {models_path}")
        return False

    try:
        #load K-Means model for customer segmentation
        kmeans_path = os.path.join(models_path, "kmeans_model.pkl")
        if os.path.exists(kmeans_path):
            kmeans_model = joblib.load(kmeans_path)
            print("K-Means model loaded")
        else:
            print(f"File not found: {kmeans_path}")
            kmeans_model = None
    except Exception as e:
        print(f"Could not load K-Means model: {e}")
        kmeans_model = None

    try:
        #load CLV prediction model (FIX 2: Changed 'csv' to 'clv')
        clv_path = os.path.join(models_path, "clv_model.pkl")
        if os.path.exists(clv_path):
            clv_model = joblib.load(clv_path)
            print("CLV model loaded")
        else:
            print(f"File not found: {clv_path}")
            clv_model = None
    except Exception as e:
        print(f"Could not load CLV model: {e}")
        clv_model = None

    try:
        #load scaler
        scaler_path = os.path.join(models_path, "scaler.pkl")
        if os.path.exists(scaler_path):
            scaler = joblib.load(scaler_path)
            print("Scaler loaded")
        else:
            print(f"File not found: {scaler_path}")
            scaler = None
    except Exception as e:
        print(f"Could not load scaler: {e}")
        scaler = None

    return kmeans_model is not None or clv_model is not None

def predict_segment(recency, frequency, monetary):
    """Predict customer segment using K-Means model"""
    if kmeans_model is None or scaler is None:
        return {"error": "Models not loaded"}
    
    #create dataframe with correct feature order
    customer_data = pd.DataFrame({
        'Recency': [recency],
        'Frequency': [frequency],
        'Monetary': [monetary]
    })

    #scale the features
    scaled_data = scaler.transform(customer_data)

    #predict cluster
    cluster = kmeans_model.predict(scaled_data)[0]

    #map cluster to segment name
    segment_map = {
        0: "At-Risk Customers",   # 2,396 customers - high recency, low frequency
        1: "VIP Customers",       # 1,024 customers - very high recency (lost customers)
        2: "Loyal Regulars",      # 145 customers - low recency, high frequency (YOUR BEST!)
        3: "New/Occasional"       # 723 customers - medium recency, medium frequency
    }

    return {
        "cluster": int(cluster),
        "segment": segment_map.get(cluster, "Unknown")
    }

def predict_clv(features_dict):
    """

    Predict Customer Lifetime Value

    features_dict should contain all 9 features

    """
    if clv_model is None:
        return {"error": "CLV model not loaded"}
    
    #expected feature order from your notebook
    feature_columns = [
        'frequency', 'recency', 'avg_quantity', 'avg_unit_price',
        'avg_transaction', 'lifespan_days', 'avg_days_between_purchases',
        'purchases_per_month', 'total_quantity'
    ]
    
    #create list of features in correct order
    features = []
    for col in feature_columns:
        features.append(features_dict.get(col, 0))

    #reshape for prediction (1 sample with 9 features)
    features_array = np.array(features).reshape(1, -1)

    #predict
    prediction = clv_model.predict(features_array)[0]

    #determine value category (adjust bins based on model)
    if prediction < 500:
        category = "Low Value"
    elif prediction < 2000:
        category = "Medium Value"
    elif prediction < 5000:
        category = "High Value"
    else:
        category = "VIP"

    return {
        "predicted_clv": round(prediction, 2),
        "value_category": category
    }