import joblib import pandas as pd import os import sklearn # Needed for joblib to load sklearn models and transformers def predict_from_row(row_dict): """ Takes a dictionary representing a single row of the Online Shoppers Purchasing Intention dataset, applies preprocessing, and returns the model prediction and probabilities. """ # Load model and transformers model_path = os.path.join('ai', 'decision_tree_model.pkl') le_path = os.path.join('ai', 'label_encoder.pkl') kbest_path = os.path.join('ai', 'selectkbest_transformer.pkl') loaded_model = joblib.load(model_path) le = joblib.load(le_path) select_kbest = joblib.load(kbest_path) # All original features in order (from dataset description) feature_order = [ 'Administrative', 'Administrative_Duration', 'Informational', 'Informational_Duration', 'ProductRelated', 'ProductRelated_Duration', 'BounceRates', 'ExitRates', 'PageValues', 'SpecialDay', 'Month', 'OperatingSystems', 'Browser', 'Region', 'TrafficType', 'VisitorType', 'Weekend' ] # Convert input to DataFrame df = pd.DataFrame([row_dict], columns=feature_order) # Label encode categorical columns for col in ['Month', 'VisitorType', 'Weekend']: if col in df: df[col] = le.transform(df[col]) # Apply SelectKBest (column order must match training X) X_kbest = select_kbest.transform(df) # Predict pred = loaded_model.predict(X_kbest) proba = loaded_model.predict_proba(X_kbest) return pred[0], proba[0] # Example usage: # model = load_model('model.joblib') # row = load_dataset_row('data.csv', 0) # prediction = predict(model, row)