Spaces:
Sleeping
Sleeping
File size: 1,707 Bytes
4216018 b4a7af8 a066869 4216018 d479508 4216018 9e5dd61 4216018 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | import joblib
import pandas as pd
import os
import sklearn # Needed for joblib to load sklearn models and transformers
def predict_from_row(row_dict):
"""
Takes a dictionary representing a single row of the Online Shoppers Purchasing Intention dataset,
applies preprocessing, and returns the model prediction and probabilities.
"""
# Load model and transformers
model_path = os.path.join('ai', 'decision_tree_model.pkl')
le_path = os.path.join('ai', 'label_encoder.pkl')
kbest_path = os.path.join('ai', 'selectkbest_transformer.pkl')
loaded_model = joblib.load(model_path)
le = joblib.load(le_path)
select_kbest = joblib.load(kbest_path)
# All original features in order (from dataset description)
feature_order = [
'Administrative', 'Administrative_Duration', 'Informational', 'Informational_Duration',
'ProductRelated', 'ProductRelated_Duration', 'BounceRates', 'ExitRates', 'PageValues',
'SpecialDay', 'Month', 'OperatingSystems', 'Browser', 'Region', 'TrafficType',
'VisitorType', 'Weekend'
]
# Convert input to DataFrame
df = pd.DataFrame([row_dict], columns=feature_order)
# Label encode categorical columns
for col in ['Month', 'VisitorType', 'Weekend']:
if col in df:
df[col] = le.transform(df[col])
# Apply SelectKBest (column order must match training X)
X_kbest = select_kbest.transform(df)
# Predict
pred = loaded_model.predict(X_kbest)
proba = loaded_model.predict_proba(X_kbest)
return pred[0], proba[0]
# Example usage:
# model = load_model('model.joblib')
# row = load_dataset_row('data.csv', 0)
# prediction = predict(model, row) |