Spaces:
Sleeping
Sleeping
| import joblib | |
| import pandas as pd | |
| import os | |
| import sklearn # Needed for joblib to load sklearn models and transformers | |
| def predict_from_row(row_dict): | |
| """ | |
| Takes a dictionary representing a single row of the Online Shoppers Purchasing Intention dataset, | |
| applies preprocessing, and returns the model prediction and probabilities. | |
| """ | |
| # Load model and transformers | |
| model_path = os.path.join('ai', 'decision_tree_model.pkl') | |
| le_path = os.path.join('ai', 'label_encoder.pkl') | |
| kbest_path = os.path.join('ai', 'selectkbest_transformer.pkl') | |
| loaded_model = joblib.load(model_path) | |
| le = joblib.load(le_path) | |
| select_kbest = joblib.load(kbest_path) | |
| # All original features in order (from dataset description) | |
| feature_order = [ | |
| 'Administrative', 'Administrative_Duration', 'Informational', 'Informational_Duration', | |
| 'ProductRelated', 'ProductRelated_Duration', 'BounceRates', 'ExitRates', 'PageValues', | |
| 'SpecialDay', 'Month', 'OperatingSystems', 'Browser', 'Region', 'TrafficType', | |
| 'VisitorType', 'Weekend' | |
| ] | |
| # Convert input to DataFrame | |
| df = pd.DataFrame([row_dict], columns=feature_order) | |
| # Label encode categorical columns | |
| for col in ['Month', 'VisitorType', 'Weekend']: | |
| if col in df: | |
| df[col] = le.transform(df[col]) | |
| # Apply SelectKBest (column order must match training X) | |
| X_kbest = select_kbest.transform(df) | |
| # Predict | |
| pred = loaded_model.predict(X_kbest) | |
| proba = loaded_model.predict_proba(X_kbest) | |
| return pred[0], proba[0] | |
| # Example usage: | |
| # model = load_model('model.joblib') | |
| # row = load_dataset_row('data.csv', 0) | |
| # prediction = predict(model, row) |