Upload 6 files
Browse files- README.md +48 -9
- app.py +64 -0
- le_region.pkl +3 -0
- le_segment.pkl +3 -0
- model.pkl +3 -0
- requirements.txt +6 -0
README.md
CHANGED
|
@@ -1,12 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# π High-Value Customer Predictor
|
| 2 |
+
|
| 3 |
+
Predict which customers are most likely to drive revenue β using purchase behavior, discount sensitivity, and profitability.
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## π Problem Statement
|
| 8 |
+
E-commerce businesses want to identify high-value customers to drive targeted retention campaigns and improve profitability.
|
| 9 |
+
|
| 10 |
---
|
| 11 |
+
|
| 12 |
+
## π§ ML Pipeline
|
| 13 |
+
- β
Data cleaning + transformation with DuckDB
|
| 14 |
+
- β
Feature engineering (recency, frequency, discount behavior, profit margins, tenure)
|
| 15 |
+
- β
Binary classification: top 30% monetary value = high-value customer
|
| 16 |
+
- β
Random Forest with hyperparameter tuning + SHAP explainability
|
| 17 |
+
- β
Model evaluation: ROC, PR, confusion matrix, calibration
|
| 18 |
+
- β
Deployment with Streamlit UI + tunnel (localtunnel or ngrok)
|
| 19 |
+
- β
Versioned with MLflow and tested via pytest
|
| 20 |
+
|
| 21 |
+
---
|
| 22 |
+
|
| 23 |
+
## π Example Features
|
| 24 |
+
- recency_days: days since last purchase
|
| 25 |
+
- order_frequency_rate: monthly ordering rate
|
| 26 |
+
- rfm_score: recency-frequency-monetary customer score
|
| 27 |
+
- profit_margin_pct: profit-to-sales ratio
|
| 28 |
+
|
| 29 |
+
---
|
| 30 |
+
|
| 31 |
+
## π§ͺ Model Performance
|
| 32 |
+
Metric | Score
|
| 33 |
+
-----------|-------
|
| 34 |
+
Accuracy | 0.97
|
| 35 |
+
F1-score | 0.95
|
| 36 |
+
ROC AUC | 0.99
|
| 37 |
+
|
| 38 |
+
---
|
| 39 |
+
|
| 40 |
+
## π Try it Out (Local)
|
| 41 |
+
Run this in your terminal:
|
| 42 |
+
|
| 43 |
+
pip install streamlit pandas scikit-learn shap
|
| 44 |
+
streamlit run app.py
|
| 45 |
+
|
| 46 |
---
|
| 47 |
|
| 48 |
+
## π Future Improvements
|
| 49 |
+
- Cloud deployment (Render / Hugging Face Spaces)
|
| 50 |
+
- Real-time model monitoring (Prometheus + Grafana)
|
| 51 |
+
- Role-based authentication for Streamlit
|
app.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pickle
|
| 5 |
+
import shap
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
|
| 8 |
+
# Load model and encoders
|
| 9 |
+
with open("model.pkl", "rb") as f:
|
| 10 |
+
model = pickle.load(f)
|
| 11 |
+
with open("le_region.pkl", "rb") as f:
|
| 12 |
+
le_region = pickle.load(f)
|
| 13 |
+
with open("le_segment.pkl", "rb") as f:
|
| 14 |
+
le_segment = pickle.load(f)
|
| 15 |
+
|
| 16 |
+
st.set_page_config(page_title="π High-Value Customer Predictor")
|
| 17 |
+
st.title("π High-Value Customer Predictor")
|
| 18 |
+
st.markdown("Enter customer details below to predict if they are high-value.")
|
| 19 |
+
|
| 20 |
+
# Layout inputs in two columns
|
| 21 |
+
col1, col2 = st.columns(2)
|
| 22 |
+
with col1:
|
| 23 |
+
recency_days = st.number_input("π
Recency (days since last purchase)", min_value=0, value=30)
|
| 24 |
+
frequency = st.number_input("π Frequency (number of orders)", min_value=1, value=5)
|
| 25 |
+
monetary_value = st.number_input("π° Monetary Value (total sales)", min_value=0.0, value=1000.0)
|
| 26 |
+
avg_order_value = st.number_input("π Average Order Value", min_value=0.0, value=200.0)
|
| 27 |
+
|
| 28 |
+
with col2:
|
| 29 |
+
total_profit = st.number_input("π Total Profit", min_value=0.0, value=100.0)
|
| 30 |
+
avg_days_between_orders = st.number_input("β³ Avg Days Between Orders", min_value=0.0, value=30.0)
|
| 31 |
+
region = st.selectbox("π Region", le_region.classes_)
|
| 32 |
+
segment = st.selectbox("π€ Segment", le_segment.classes_)
|
| 33 |
+
|
| 34 |
+
# Encode categorical inputs
|
| 35 |
+
region_enc = le_region.transform([region])[0]
|
| 36 |
+
segment_enc = le_segment.transform([segment])[0]
|
| 37 |
+
|
| 38 |
+
input_data = pd.DataFrame([[
|
| 39 |
+
recency_days, frequency, monetary_value,
|
| 40 |
+
avg_order_value, total_profit, avg_days_between_orders,
|
| 41 |
+
region_enc, segment_enc
|
| 42 |
+
]], columns=[
|
| 43 |
+
'recency_days', 'frequency', 'monetary_value',
|
| 44 |
+
'avg_order_value', 'total_profit', 'avg_days_between_orders',
|
| 45 |
+
'region_enc', 'segment_enc'
|
| 46 |
+
])
|
| 47 |
+
|
| 48 |
+
if st.button("π Predict"):
|
| 49 |
+
pred = model.predict(input_data)[0]
|
| 50 |
+
proba = model.predict_proba(input_data)[0][1]
|
| 51 |
+
|
| 52 |
+
if pred == 1:
|
| 53 |
+
st.success(f"β
Predicted HIGH VALUE with {proba:.2%} confidence.")
|
| 54 |
+
else:
|
| 55 |
+
st.info(f"βΉοΈ Predicted NOT high value ({proba:.2%} confidence).")
|
| 56 |
+
|
| 57 |
+
# SHAP explanation
|
| 58 |
+
explainer = shap.Explainer(model)
|
| 59 |
+
shap_values = explainer(input_data)
|
| 60 |
+
|
| 61 |
+
st.subheader("π Feature Contribution (SHAP)")
|
| 62 |
+
fig, ax = plt.subplots()
|
| 63 |
+
shap.plots.waterfall(shap_values[0], max_display=8, show=False)
|
| 64 |
+
st.pyplot(fig)
|
le_region.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d46f55b78a83417be6cf81cf6a6bed3269d03394d78235b25e79cb08eebc604
|
| 3 |
+
size 275
|
le_segment.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e63bd88dc4eb0ccd09167bfa798b8cd023aa886635dd2fe5b5a99df037a45a4e
|
| 3 |
+
size 280
|
model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b495865cb41d5ec3d268dad51ba9acf62a9204fc32bdf8fc37e441c59f5094b
|
| 3 |
+
size 1013510
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
scikit-learn
|
| 3 |
+
pandas
|
| 4 |
+
shap
|
| 5 |
+
matplotlib
|
| 6 |
+
pickle-mixin
|