File size: 1,707 Bytes
4216018
 
b4a7af8
a066869
4216018
 
 
 
 
 
 
 
 
 
d479508
4216018
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e5dd61
4216018
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import joblib
import pandas as pd
import os
import sklearn  # Needed for joblib to load sklearn models and transformers

def predict_from_row(row_dict):
    """
    Takes a dictionary representing a single row of the Online Shoppers Purchasing Intention dataset,
    applies preprocessing, and returns the model prediction and probabilities.
    """
    # Load model and transformers
    model_path = os.path.join('ai', 'decision_tree_model.pkl')
    le_path = os.path.join('ai', 'label_encoder.pkl')
    kbest_path = os.path.join('ai', 'selectkbest_transformer.pkl')
    
    loaded_model = joblib.load(model_path)
    le = joblib.load(le_path)
    select_kbest = joblib.load(kbest_path)

    # All original features in order (from dataset description)
    feature_order = [
        'Administrative', 'Administrative_Duration', 'Informational', 'Informational_Duration',
        'ProductRelated', 'ProductRelated_Duration', 'BounceRates', 'ExitRates', 'PageValues',
        'SpecialDay', 'Month', 'OperatingSystems', 'Browser', 'Region', 'TrafficType',
        'VisitorType', 'Weekend'
    ]

    # Convert input to DataFrame
    df = pd.DataFrame([row_dict], columns=feature_order)

    # Label encode categorical columns
    for col in ['Month', 'VisitorType', 'Weekend']:
        if col in df:
            df[col] = le.transform(df[col])

    # Apply SelectKBest (column order must match training X)
    X_kbest = select_kbest.transform(df)

    # Predict
    pred = loaded_model.predict(X_kbest)
    proba = loaded_model.predict_proba(X_kbest)
    return pred[0], proba[0]

# Example usage:
# model = load_model('model.joblib')
# row = load_dataset_row('data.csv', 0)
# prediction = predict(model, row)