Spaces:
Sleeping
Sleeping
Update project structure and add new pages
Browse files- requirements.txt +0 -1
- src/pages/about.py +44 -0
- src/pages/dashboard.py +99 -0
- src/pages/predict.py +525 -0
- src/static/styles.css +21 -0
requirements.txt
CHANGED
|
@@ -8,7 +8,6 @@ plotly
|
|
| 8 |
requests
|
| 9 |
scikit-learn
|
| 10 |
imbalanced-learn
|
| 11 |
-
pickle
|
| 12 |
joblib
|
| 13 |
onnx
|
| 14 |
skl2onnx
|
|
|
|
| 8 |
requests
|
| 9 |
scikit-learn
|
| 10 |
imbalanced-learn
|
|
|
|
| 11 |
joblib
|
| 12 |
onnx
|
| 13 |
skl2onnx
|
src/pages/about.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
|
| 3 |
+
st.title('Author Project')
|
| 4 |
+
|
| 5 |
+
col1, col2 = st.columns(2, gap='small', vertical_alignment='center')
|
| 6 |
+
with col1:
|
| 7 |
+
st.image("images/Fendy.png", width=250)
|
| 8 |
+
with col2:
|
| 9 |
+
st.title("Fendy Hendriyanto", anchor=False)
|
| 10 |
+
st.write(
|
| 11 |
+
"AI Engineer and Instructor"
|
| 12 |
+
)
|
| 13 |
+
st.write(
|
| 14 |
+
"Assisting and mentoring students to help analyze and supporting data driven with creativity and decision making."
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
#--- EXPERIENCE & QUALIFICATIONS ------
|
| 18 |
+
st.write("\n")
|
| 19 |
+
st.subheader("Experience and Qualifications", anchor=False)
|
| 20 |
+
st.write(
|
| 21 |
+
"""
|
| 22 |
+
- 2 years experience coaching and mentoring about Artificial Intelligence
|
| 23 |
+
- Strong hands-on experience and knowledge in Python and Data Science
|
| 24 |
+
- Proficient in using various libraries and tools such as TensorFlow, Keras, Scikit-learn, OpenCV, Pandas
|
| 25 |
+
- Good understanding and analyzing of statistical principles and their perspective applications
|
| 26 |
+
- Excellent team player and initiative on tasks
|
| 27 |
+
|
| 28 |
+
"""
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
# ---- SKILLS ----
|
| 32 |
+
st.write("\n")
|
| 33 |
+
st.subheader("Hard Skills", anchor=False)
|
| 34 |
+
st.write(
|
| 35 |
+
"""
|
| 36 |
+
- Programming : Python (Pandas, Scikit-learn, Scikit-image), R, SQL, JavaScript
|
| 37 |
+
- Data Visualization : Tableau, Spreadsheet, Excel
|
| 38 |
+
- Modelling : Tensorflow, Keras, PyCaret, XGBoost, CometML
|
| 39 |
+
- Databases : MySQL, PostgreSQL, SQLite
|
| 40 |
+
- Deployment : Streamlit, Flask, Gradio, Huggingface, Git
|
| 41 |
+
- Frameworks : OpenCV, NLTK
|
| 42 |
+
|
| 43 |
+
"""
|
| 44 |
+
)
|
src/pages/dashboard.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import seaborn as sns
|
| 3 |
+
import streamlit as st
|
| 4 |
+
import plotly.express as px
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
import plotly.graph_objects as go
|
| 7 |
+
|
| 8 |
+
st.title("Dashboard Analysis Customer Retail")
|
| 9 |
+
|
| 10 |
+
# Load CSS style
|
| 11 |
+
with open('static/styles.css') as f:
|
| 12 |
+
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
|
| 13 |
+
|
| 14 |
+
# Load dataset
|
| 15 |
+
@st.cache_data
|
| 16 |
+
def load_data():
|
| 17 |
+
data = pd.read_csv('data/customer_shopping_data.csv')
|
| 18 |
+
return data
|
| 19 |
+
data = load_data()
|
| 20 |
+
|
| 21 |
+
with st.expander("HASIL DATA"):
|
| 22 |
+
data = pd.DataFrame({
|
| 23 |
+
'InvoiceNo': data['invoice_no'],
|
| 24 |
+
'CustomerID': data['customer_id'],
|
| 25 |
+
'Gender': data['gender'],
|
| 26 |
+
'Age': data['age'],
|
| 27 |
+
'Category': data['category'],
|
| 28 |
+
'Quantity': data['quantity'],
|
| 29 |
+
'Price': data['price'],
|
| 30 |
+
'PaymentMethod': data['payment_method'],
|
| 31 |
+
'InvoiceDate': data['invoice_date'],
|
| 32 |
+
'ShoppingMall': data['shopping_mall']
|
| 33 |
+
})
|
| 34 |
+
st.dataframe(data, use_container_width=True)
|
| 35 |
+
|
| 36 |
+
# Download Dataset
|
| 37 |
+
download = data.to_csv(index=False).encode('utf-8')
|
| 38 |
+
st.download_button(label = "DOWNLOAD DATASET",
|
| 39 |
+
data = download,
|
| 40 |
+
key = 'download_data.csv',
|
| 41 |
+
file_name = 'dataset_retail.csv')
|
| 42 |
+
|
| 43 |
+
# Visualization
|
| 44 |
+
with st.expander("DISTRIBUSI KATEGORI DAN PEMBAYARAN"):
|
| 45 |
+
col1, col2 = st.columns(2)
|
| 46 |
+
with col1:
|
| 47 |
+
data_quantity = data.groupby('Category')['Quantity'].sum()
|
| 48 |
+
# Plot Pie Chart
|
| 49 |
+
plt.figure(figsize = (10, 8))
|
| 50 |
+
plt.pie(data_quantity.values, labels = data_quantity.index,
|
| 51 |
+
autopct = '%1.1f%%', colors = sns.color_palette("pastel"))
|
| 52 |
+
# Title
|
| 53 |
+
plt.title('Kuantitas Produk Berdasarkan Kategori', fontsize = 16)
|
| 54 |
+
st.pyplot(plt)
|
| 55 |
+
|
| 56 |
+
with col2:
|
| 57 |
+
payment_counts = data['PaymentMethod'].value_counts()
|
| 58 |
+
fig = px.bar(x = payment_counts.index, y = payment_counts.values,
|
| 59 |
+
labels = {'x': 'Metode Pembayaran', 'y': 'Jumlah Transaksi'},
|
| 60 |
+
color = payment_counts.index)
|
| 61 |
+
fig.update_layout(font_size = 14)
|
| 62 |
+
title = fig.update_layout(title = {'text': 'Distribusi Metode Pembayaran',
|
| 63 |
+
'xanchor': 'center',
|
| 64 |
+
'yanchor': 'top',
|
| 65 |
+
'x': 0.5,
|
| 66 |
+
'y': 0.95})
|
| 67 |
+
|
| 68 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 69 |
+
|
| 70 |
+
st.write(f"<b>NOTES</b>: Distribusi dalam kategori berdasarkan kuantitas kategori produk yang sering dibeli oleh pelanggan adalah baju, kosmetik dan F&B. Sedangkan, metode pembayaran dengan transaksi terbanyak adalah Cash dan Credit.", unsafe_allow_html=True)
|
| 71 |
+
|
| 72 |
+
with st.expander("TOTAL PENDAPATAN DAN PENJUALAN"):
|
| 73 |
+
col1, col2 = st.columns(2)
|
| 74 |
+
with col1:
|
| 75 |
+
total_revenue = data.groupby('ShoppingMall')['Price'].sum()
|
| 76 |
+
fig = px.bar(x = total_revenue.index, y = total_revenue.values,
|
| 77 |
+
labels = {'x': 'Mall', 'y': 'Total Pendapatan'},
|
| 78 |
+
color = total_revenue.index)
|
| 79 |
+
title = fig.update_layout(title = {'text': 'Total Pendapatan Setiap Pusat Perbelanjaan',
|
| 80 |
+
'xanchor': 'center',
|
| 81 |
+
'yanchor': 'top',
|
| 82 |
+
'x': 0.5,
|
| 83 |
+
'y': 0.95})
|
| 84 |
+
|
| 85 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 86 |
+
|
| 87 |
+
with col2:
|
| 88 |
+
total_sales = data.groupby('ShoppingMall')['Quantity'].sum().sort_values(ascending=False)
|
| 89 |
+
fig = px.bar(x = total_sales.index, y = total_sales.values,
|
| 90 |
+
labels = {'x': 'Mall', 'y': 'Total Penjualan'},
|
| 91 |
+
color = total_sales.index)
|
| 92 |
+
title = fig.update_layout(title = {'text': 'Total Penjualan Setiap Pusat Perbelanjaan',
|
| 93 |
+
'xanchor': 'center',
|
| 94 |
+
'yanchor': 'top',
|
| 95 |
+
'x': 0.5,
|
| 96 |
+
'y': 0.95})
|
| 97 |
+
|
| 98 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 99 |
+
st.write(f"<b>NOTES</b>: Pusat perbelanjaan dengan total pendapatan dan penjualan tertinggi adalah Mall of Istanbul, diikuti oleh Mall Kanyon dan Mall Metrocity.", unsafe_allow_html=True)
|
src/pages/predict.py
ADDED
|
@@ -0,0 +1,525 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pickle
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import seaborn as sns
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import onnxruntime as ort
|
| 8 |
+
import plotly.express as px
|
| 9 |
+
from scipy.stats import zscore
|
| 10 |
+
import matplotlib.pyplot as plt
|
| 11 |
+
from skl2onnx import convert_sklearn
|
| 12 |
+
from sklearn.feature_selection import RFE
|
| 13 |
+
from sklearn.ensemble import RandomForestClassifier
|
| 14 |
+
from sklearn.model_selection import train_test_split
|
| 15 |
+
from skl2onnx.common.data_types import FloatTensorType
|
| 16 |
+
from streamlit_extras.metric_cards import style_metric_cards
|
| 17 |
+
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
| 18 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
|
| 19 |
+
|
| 20 |
+
st.title("Customer Category Prediction (Case: Turkey Customer)")
|
| 21 |
+
st.write("Prediction Customer in Turkey with Probability Using Ensemble Technique Based")
|
| 22 |
+
|
| 23 |
+
# Load CSS style
|
| 24 |
+
with open('static/styles.css') as f:
|
| 25 |
+
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
|
| 26 |
+
|
| 27 |
+
# Load Dataset
|
| 28 |
+
retail = pd.read_csv('data/customer_shopping_data.csv')
|
| 29 |
+
|
| 30 |
+
X = retail.loc[:, ['age', 'gender', 'price', 'payment_method', 'shopping_mall']]
|
| 31 |
+
y = retail[['category']]
|
| 32 |
+
|
| 33 |
+
# Encode categorical variables
|
| 34 |
+
le = LabelEncoder()
|
| 35 |
+
X['gender'] = le.fit_transform(X['gender'])
|
| 36 |
+
X['payment_method'] = le.fit_transform(X['payment_method'])
|
| 37 |
+
X['shopping_mall'] = le.fit_transform(X['shopping_mall'])
|
| 38 |
+
y_encoded = le.fit_transform(y)
|
| 39 |
+
|
| 40 |
+
# Splitting data
|
| 41 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=44)
|
| 42 |
+
|
| 43 |
+
# Preprocessing
|
| 44 |
+
scaler = StandardScaler()
|
| 45 |
+
X_train_scaled = scaler.fit_transform(X_train)
|
| 46 |
+
X_test_scaled = scaler.transform(X_test)
|
| 47 |
+
|
| 48 |
+
# Outlier detection using Z-Score
|
| 49 |
+
z_scores = np.abs(zscore(X_train_scaled))
|
| 50 |
+
threshold = 5
|
| 51 |
+
outliers = np.where(z_scores > threshold)
|
| 52 |
+
|
| 53 |
+
X_train_clean = X_train_scaled[(z_scores < threshold).all(axis=1)]
|
| 54 |
+
y_train_clean = y_train[(z_scores < threshold).all(axis=1)]
|
| 55 |
+
|
| 56 |
+
#------------ MODEL TRAINING SECTION ---------
|
| 57 |
+
with st.expander("๐ MODEL TRAINING & MANAGEMENT"):
|
| 58 |
+
st.subheader("Train or Load Model")
|
| 59 |
+
|
| 60 |
+
col1, col2 = st.columns(2)
|
| 61 |
+
|
| 62 |
+
with col1:
|
| 63 |
+
st.write("### Training Parameters")
|
| 64 |
+
n_estimators = st.slider("Number of Trees (n_estimators)",
|
| 65 |
+
min_value=50, max_value=500, value=300, step=50)
|
| 66 |
+
test_size = st.slider("Test Size",
|
| 67 |
+
min_value=0.1, max_value=0.4, value=0.2, step=0.05)
|
| 68 |
+
random_state = st.number_input("Random State",
|
| 69 |
+
min_value=0, max_value=100, value=44)
|
| 70 |
+
n_features = st.slider("Number of Features to Select (RFE)",
|
| 71 |
+
min_value=1, max_value=5, value=5)
|
| 72 |
+
|
| 73 |
+
train_button = st.button("๐ TRAIN NEW MODEL", type="primary")
|
| 74 |
+
|
| 75 |
+
with col2:
|
| 76 |
+
st.write("### Model Management")
|
| 77 |
+
model_format = st.radio("Choose Model Format:",
|
| 78 |
+
["ONNX Model (.onnx)", "Pickle Model (.pkl)"])
|
| 79 |
+
|
| 80 |
+
load_option = st.radio("Choose Model Source:",
|
| 81 |
+
["Load Existing Model", "Use Newly Trained Model"])
|
| 82 |
+
|
| 83 |
+
if load_option == "Load Existing Model":
|
| 84 |
+
if model_format == "ONNX Model (.onnx)":
|
| 85 |
+
model_path = 'model/best_model_rf.onnx'
|
| 86 |
+
metadata_path = 'model/model_metadata.pkl'
|
| 87 |
+
if os.path.exists(model_path) and os.path.exists(metadata_path):
|
| 88 |
+
st.success("โ
ONNX model found!")
|
| 89 |
+
model_loaded = True
|
| 90 |
+
use_onnx = True
|
| 91 |
+
else:
|
| 92 |
+
st.error("โ ONNX model not found. Please train a new model first.")
|
| 93 |
+
model_loaded = False
|
| 94 |
+
use_onnx = False
|
| 95 |
+
else:
|
| 96 |
+
model_path = 'model/best_model_rf.pkl'
|
| 97 |
+
if os.path.exists(model_path):
|
| 98 |
+
st.success("โ
Pickle model found!")
|
| 99 |
+
model_loaded = True
|
| 100 |
+
use_onnx = False
|
| 101 |
+
else:
|
| 102 |
+
st.error("โ Pickle model not found. Please train a new model first.")
|
| 103 |
+
model_loaded = False
|
| 104 |
+
use_onnx = False
|
| 105 |
+
else:
|
| 106 |
+
model_loaded = False
|
| 107 |
+
use_onnx = False
|
| 108 |
+
|
| 109 |
+
# Initialize session state for model
|
| 110 |
+
if 'trained_model' not in st.session_state:
|
| 111 |
+
st.session_state.trained_model = None
|
| 112 |
+
st.session_state.trained_rfe = None
|
| 113 |
+
st.session_state.trained_scaler = None
|
| 114 |
+
st.session_state.trained_le = None
|
| 115 |
+
st.session_state.model_metrics = None
|
| 116 |
+
st.session_state.onnx_session = None
|
| 117 |
+
|
| 118 |
+
# Train new model
|
| 119 |
+
if train_button:
|
| 120 |
+
with st.spinner("Training model... Please wait..."):
|
| 121 |
+
# Re-split data with new test_size
|
| 122 |
+
X_train_new, X_test_new, y_train_new, y_test_new = train_test_split(
|
| 123 |
+
X, y_encoded, test_size=test_size, random_state=random_state
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
# Preprocessing
|
| 127 |
+
scaler_new = StandardScaler()
|
| 128 |
+
X_train_scaled_new = scaler_new.fit_transform(X_train_new)
|
| 129 |
+
X_test_scaled_new = scaler_new.transform(X_test_new)
|
| 130 |
+
|
| 131 |
+
# Outlier removal
|
| 132 |
+
z_scores_new = np.abs(zscore(X_train_scaled_new))
|
| 133 |
+
X_train_clean_new = X_train_scaled_new[(z_scores_new < threshold).all(axis=1)]
|
| 134 |
+
y_train_clean_new = y_train_new[(z_scores_new < threshold).all(axis=1)]
|
| 135 |
+
|
| 136 |
+
# Model training with RFE
|
| 137 |
+
classifier_new = RandomForestClassifier(n_estimators=n_estimators, random_state=random_state)
|
| 138 |
+
rfe_new = RFE(classifier_new, n_features_to_select=n_features)
|
| 139 |
+
X_train_rfe = rfe_new.fit_transform(X_train_clean_new, y_train_clean_new)
|
| 140 |
+
X_test_rfe = rfe_new.transform(X_test_scaled_new)
|
| 141 |
+
|
| 142 |
+
# Fit the model
|
| 143 |
+
classifier_new.fit(X_train_rfe, y_train_clean_new)
|
| 144 |
+
|
| 145 |
+
# Predictions
|
| 146 |
+
y_pred_new = classifier_new.predict(X_test_rfe)
|
| 147 |
+
|
| 148 |
+
# Calculate metrics
|
| 149 |
+
metrics = {
|
| 150 |
+
'accuracy': accuracy_score(y_test_new, y_pred_new),
|
| 151 |
+
'precision': precision_score(y_test_new, y_pred_new, average='weighted'),
|
| 152 |
+
'recall': recall_score(y_test_new, y_pred_new, average='weighted'),
|
| 153 |
+
'f1_score': f1_score(y_test_new, y_pred_new, average='weighted')
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
# Save to session state
|
| 157 |
+
st.session_state.trained_model = classifier_new
|
| 158 |
+
st.session_state.trained_rfe = rfe_new
|
| 159 |
+
st.session_state.trained_scaler = scaler_new
|
| 160 |
+
st.session_state.trained_le = le
|
| 161 |
+
st.session_state.model_metrics = metrics
|
| 162 |
+
st.session_state.X_test = X_test_rfe
|
| 163 |
+
st.session_state.y_test = y_test_new
|
| 164 |
+
st.session_state.y_pred = y_pred_new
|
| 165 |
+
|
| 166 |
+
# Save as Pickle
|
| 167 |
+
model_package = {
|
| 168 |
+
'classifier': classifier_new,
|
| 169 |
+
'rfe': rfe_new,
|
| 170 |
+
'scaler': scaler_new,
|
| 171 |
+
'label_encoder': le,
|
| 172 |
+
'metrics': metrics,
|
| 173 |
+
'n_features': n_features
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
with open('model/best_model_rf.pkl', 'wb') as f:
|
| 177 |
+
pickle.dump(model_package, f)
|
| 178 |
+
|
| 179 |
+
# Convert and Save as ONNX
|
| 180 |
+
try:
|
| 181 |
+
# Define initial type for ONNX conversion
|
| 182 |
+
initial_type = [('float_input', FloatTensorType([None, n_features]))]
|
| 183 |
+
|
| 184 |
+
# Convert model to ONNX
|
| 185 |
+
onnx_model = convert_sklearn(classifier_new, initial_types=initial_type,
|
| 186 |
+
target_opset=12)
|
| 187 |
+
|
| 188 |
+
# Save ONNX model
|
| 189 |
+
with open('model/best_model_rf.onnx', 'wb') as f:
|
| 190 |
+
f.write(onnx_model.SerializeToString())
|
| 191 |
+
|
| 192 |
+
# Save metadata (scaler, rfe, label_encoder) separately
|
| 193 |
+
metadata = {
|
| 194 |
+
'scaler': scaler_new,
|
| 195 |
+
'rfe': rfe_new,
|
| 196 |
+
'label_encoder': le,
|
| 197 |
+
'metrics': metrics,
|
| 198 |
+
'n_features': n_features,
|
| 199 |
+
'feature_names': ['age', 'gender', 'price', 'payment_method', 'shopping_mall']
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
with open('model/model_metadata.pkl', 'wb') as f:
|
| 203 |
+
pickle.dump(metadata, f)
|
| 204 |
+
|
| 205 |
+
st.success(f"โ
Model trained and saved successfully!")
|
| 206 |
+
st.success(f"๐ Accuracy: {metrics['accuracy']:.4f}")
|
| 207 |
+
st.success(f"๐พ Saved as: Pickle (.pkl) and ONNX (.onnx)")
|
| 208 |
+
|
| 209 |
+
except Exception as e:
|
| 210 |
+
st.warning(f"โ ๏ธ Model saved as Pickle only. ONNX conversion failed: {str(e)}")
|
| 211 |
+
|
| 212 |
+
st.balloons()
|
| 213 |
+
|
| 214 |
+
# Determine which model to use
|
| 215 |
+
if load_option == "Use Newly Trained Model" and st.session_state.trained_model is not None:
|
| 216 |
+
classifier = st.session_state.trained_model
|
| 217 |
+
rfe = st.session_state.trained_rfe
|
| 218 |
+
scaler = st.session_state.trained_scaler
|
| 219 |
+
le_model = st.session_state.trained_le
|
| 220 |
+
X_test_final = st.session_state.X_test
|
| 221 |
+
y_test_final = st.session_state.y_test
|
| 222 |
+
y_pred_final = st.session_state.y_pred
|
| 223 |
+
|
| 224 |
+
accuracy = st.session_state.model_metrics['accuracy']
|
| 225 |
+
precision = st.session_state.model_metrics['precision']
|
| 226 |
+
recall = st.session_state.model_metrics['recall']
|
| 227 |
+
f1 = st.session_state.model_metrics['f1_score']
|
| 228 |
+
|
| 229 |
+
onnx_session = None
|
| 230 |
+
st.info("๐ต Using newly trained model from this session")
|
| 231 |
+
|
| 232 |
+
elif model_loaded and use_onnx:
|
| 233 |
+
# Load ONNX Model
|
| 234 |
+
try:
|
| 235 |
+
onnx_session = ort.InferenceSession('model/best_model_rf.onnx')
|
| 236 |
+
|
| 237 |
+
# Load metadata
|
| 238 |
+
with open('model/model_metadata.pkl', 'rb') as f:
|
| 239 |
+
metadata = pickle.load(f)
|
| 240 |
+
|
| 241 |
+
scaler = metadata['scaler']
|
| 242 |
+
rfe = metadata['rfe']
|
| 243 |
+
le_model = metadata['label_encoder']
|
| 244 |
+
metrics = metadata.get('metrics', {})
|
| 245 |
+
|
| 246 |
+
# Apply transformations
|
| 247 |
+
X_train_rfe = rfe.fit_transform(X_train_clean, y_train_clean)
|
| 248 |
+
X_test_final = rfe.transform(X_test_scaled)
|
| 249 |
+
|
| 250 |
+
# Predict using ONNX
|
| 251 |
+
input_name = onnx_session.get_inputs()[0].name
|
| 252 |
+
label_name = onnx_session.get_outputs()[0].name
|
| 253 |
+
|
| 254 |
+
y_pred_final = onnx_session.run([label_name], {input_name: X_test_final.astype(np.float32)})[0]
|
| 255 |
+
y_test_final = y_test
|
| 256 |
+
|
| 257 |
+
# Calculate metrics
|
| 258 |
+
accuracy = metrics.get('accuracy', accuracy_score(y_test_final, y_pred_final))
|
| 259 |
+
precision = metrics.get('precision', precision_score(y_test_final, y_pred_final, average='weighted'))
|
| 260 |
+
recall = metrics.get('recall', recall_score(y_test_final, y_pred_final, average='weighted'))
|
| 261 |
+
f1 = metrics.get('f1_score', f1_score(y_test_final, y_pred_final, average='weighted'))
|
| 262 |
+
|
| 263 |
+
classifier = None # ONNX doesn't need sklearn classifier
|
| 264 |
+
|
| 265 |
+
st.info("๐ข Using ONNX model from file")
|
| 266 |
+
|
| 267 |
+
except Exception as e:
|
| 268 |
+
st.error(f"Failed to load ONNX model: {str(e)}")
|
| 269 |
+
st.warning("Falling back to default model...")
|
| 270 |
+
model_loaded = False
|
| 271 |
+
use_onnx = False
|
| 272 |
+
onnx_session = None
|
| 273 |
+
|
| 274 |
+
elif model_loaded and not use_onnx:
|
| 275 |
+
# Load Pickle Model
|
| 276 |
+
with open('model/best_model_rf.pkl', 'rb') as f:
|
| 277 |
+
model_data = pickle.load(f)
|
| 278 |
+
|
| 279 |
+
if isinstance(model_data, dict):
|
| 280 |
+
classifier = model_data['classifier']
|
| 281 |
+
rfe = model_data.get('rfe', None)
|
| 282 |
+
scaler = model_data.get('scaler', scaler)
|
| 283 |
+
le_model = model_data.get('label_encoder', le)
|
| 284 |
+
|
| 285 |
+
if rfe is None:
|
| 286 |
+
rfe = RFE(classifier, n_features_to_select=5)
|
| 287 |
+
|
| 288 |
+
# Apply transformations
|
| 289 |
+
X_train_rfe = rfe.fit_transform(X_train_clean, y_train_clean)
|
| 290 |
+
X_test_final = rfe.transform(X_test_scaled)
|
| 291 |
+
classifier.fit(X_train_rfe, y_train_clean)
|
| 292 |
+
y_pred_final = classifier.predict(X_test_final)
|
| 293 |
+
y_test_final = y_test
|
| 294 |
+
|
| 295 |
+
# Calculate metrics
|
| 296 |
+
accuracy = accuracy_score(y_test_final, y_pred_final)
|
| 297 |
+
precision = precision_score(y_test_final, y_pred_final, average='weighted')
|
| 298 |
+
recall = recall_score(y_test_final, y_pred_final, average='weighted')
|
| 299 |
+
f1 = f1_score(y_test_final, y_pred_final, average='weighted')
|
| 300 |
+
else:
|
| 301 |
+
classifier = model_data
|
| 302 |
+
le_model = le
|
| 303 |
+
|
| 304 |
+
if hasattr(classifier, 'named_steps') or hasattr(classifier, 'steps'):
|
| 305 |
+
y_pred_final = classifier.predict(X_test)
|
| 306 |
+
y_test_final = y_test
|
| 307 |
+
X_test_final = X_test_scaled
|
| 308 |
+
rfe = None
|
| 309 |
+
else:
|
| 310 |
+
rfe = RFE(classifier, n_features_to_select=5)
|
| 311 |
+
X_train_rfe = rfe.fit_transform(X_train_clean, y_train_clean)
|
| 312 |
+
X_test_final = rfe.transform(X_test_scaled)
|
| 313 |
+
classifier.fit(X_train_rfe, y_train_clean)
|
| 314 |
+
y_pred_final = classifier.predict(X_test_final)
|
| 315 |
+
y_test_final = y_test
|
| 316 |
+
|
| 317 |
+
accuracy = accuracy_score(y_test_final, y_pred_final)
|
| 318 |
+
precision = precision_score(y_test_final, y_pred_final, average='weighted')
|
| 319 |
+
recall = recall_score(y_test_final, y_pred_final, average='weighted')
|
| 320 |
+
f1 = f1_score(y_test_final, y_pred_final, average='weighted')
|
| 321 |
+
|
| 322 |
+
onnx_session = None
|
| 323 |
+
st.info("๐ข Using Pickle model from file")
|
| 324 |
+
|
| 325 |
+
else:
|
| 326 |
+
# Default: train on the fly
|
| 327 |
+
classifier = RandomForestClassifier(n_estimators=300, random_state=44)
|
| 328 |
+
rfe = RFE(classifier, n_features_to_select=5)
|
| 329 |
+
X_train_rfe = rfe.fit_transform(X_train_clean, y_train_clean)
|
| 330 |
+
X_test_final = rfe.transform(X_test_scaled)
|
| 331 |
+
classifier.fit(X_train_rfe, y_train_clean)
|
| 332 |
+
y_pred_final = classifier.predict(X_test_final)
|
| 333 |
+
y_test_final = y_test
|
| 334 |
+
le_model = le
|
| 335 |
+
|
| 336 |
+
accuracy = accuracy_score(y_test_final, y_pred_final)
|
| 337 |
+
precision = precision_score(y_test_final, y_pred_final, average='weighted')
|
| 338 |
+
recall = recall_score(y_test_final, y_pred_final, average='weighted')
|
| 339 |
+
f1 = f1_score(y_test_final, y_pred_final, average='weighted')
|
| 340 |
+
|
| 341 |
+
onnx_session = None
|
| 342 |
+
st.warning("โ ๏ธ Using default model (trained on-the-fly)")
|
| 343 |
+
|
| 344 |
+
# Evaluation Metrics
|
| 345 |
+
with st.expander("๐ EVALUATION METRICS"):
|
| 346 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 347 |
+
col1.metric("ACCURACY", value=f'{accuracy:.4f}', delta='Accuracy Score')
|
| 348 |
+
col2.metric("PRECISION", value=f'{precision:.4f}', delta='Precision Score With Weighted Average')
|
| 349 |
+
col3.metric("RECALL", value=f'{recall:.4f}', delta='Recall Score With Weighted Average')
|
| 350 |
+
col4.metric("F1 SCORE", value=f'{f1:.4f}', delta='F1 Score with Weighted Average')
|
| 351 |
+
style_metric_cards(background_color='#FFFFFF', border_left_color='#9900AD', border_color='#1F66BD', box_shadow='#F71938')
|
| 352 |
+
st.write(f"<b>NOTES</b>: Hasil evaluasi metriks yang diterapkan sangat baik dan sudah sesuai dengan hasil pelatihan model algoritma Random Forest.", unsafe_allow_html=True)
|
| 353 |
+
|
| 354 |
+
# Prediction Table
|
| 355 |
+
with st.expander("๐ PREDICTION TABLE"):
|
| 356 |
+
prediction_table = pd.DataFrame({
|
| 357 |
+
'age': X_test_final[:, 0].ravel(),
|
| 358 |
+
'gender': X_test_final[:, 1].ravel(),
|
| 359 |
+
'price': X_test_final[:, 2].ravel(),
|
| 360 |
+
'payment_method': X_test_final[:, 3].ravel(),
|
| 361 |
+
'shopping_mall': X_test_final[:, 4].ravel(),
|
| 362 |
+
'Category | Actual Y': y_test_final.ravel(),
|
| 363 |
+
'Y_Predicted': y_pred_final.ravel(),
|
| 364 |
+
'Accuracy': [accuracy] * len(y_test_final),
|
| 365 |
+
'Precision': [precision] * len(y_test_final),
|
| 366 |
+
'Recall': [recall] * len(y_test_final),
|
| 367 |
+
'F1 Score': [f1] * len(y_test_final)
|
| 368 |
+
})
|
| 369 |
+
|
| 370 |
+
st.dataframe(prediction_table, use_container_width=True)
|
| 371 |
+
st.write(f'<b>NOTES</b>: Pada bagian tabel prediksi ini menggunakan data yang telah diolah sebelumnya sehingga sangat berbeda dengan data asli.', unsafe_allow_html=True)
|
| 372 |
+
|
| 373 |
+
# Download Predicted Table in CSV
|
| 374 |
+
df_predict = prediction_table.to_csv(index=False).encode('utf-8')
|
| 375 |
+
st.download_button(label="๐ฅ DOWNLOAD PREDICTED DATA",
|
| 376 |
+
data=df_predict,
|
| 377 |
+
key="download_predict.csv",
|
| 378 |
+
file_name='data_predict.csv')
|
| 379 |
+
|
| 380 |
+
# Confusion Matrix and Feature Importance
|
| 381 |
+
with st.expander("๐ CONFUSION MATRIX & FEATURE IMPORTANCE"):
|
| 382 |
+
col1, col2 = st.columns(2)
|
| 383 |
+
with col1:
|
| 384 |
+
target_names = ['Books', 'Clothing', 'Cosmetics', 'Food & Beverage',
|
| 385 |
+
'Shoes', 'Souvenir', 'Technology', 'Toys']
|
| 386 |
+
cm = confusion_matrix(y_test_final, y_pred_final)
|
| 387 |
+
plt.figure(figsize=(15, 8))
|
| 388 |
+
sns.heatmap(cm, annot=True, cmap='Blues', fmt='d', xticklabels=target_names, yticklabels=target_names)
|
| 389 |
+
plt.title('Confusion Matrix Customer Category Prediction')
|
| 390 |
+
plt.xlabel('Predicted labels')
|
| 391 |
+
plt.ylabel('True labels')
|
| 392 |
+
st.pyplot(fig=plt, use_container_width=True)
|
| 393 |
+
|
| 394 |
+
# Ganti bagian Feature Importance (sekitar baris 390-410) dengan kode ini:
|
| 395 |
+
with col2:
|
| 396 |
+
# Feature Importance only available for sklearn models, not ONNX
|
| 397 |
+
if classifier is not None:
|
| 398 |
+
try:
|
| 399 |
+
# Check if classifier is a Pipeline
|
| 400 |
+
if hasattr(classifier, 'named_steps'):
|
| 401 |
+
# Try common pipeline step names
|
| 402 |
+
if 'randomforestclassifier' in classifier.named_steps:
|
| 403 |
+
actual_classifier = classifier.named_steps['randomforestclassifier']
|
| 404 |
+
elif 'classifier' in classifier.named_steps:
|
| 405 |
+
actual_classifier = classifier.named_steps['classifier']
|
| 406 |
+
elif 'model' in classifier.named_steps:
|
| 407 |
+
actual_classifier = classifier.named_steps['model']
|
| 408 |
+
else:
|
| 409 |
+
# Get the last step (usually the classifier)
|
| 410 |
+
actual_classifier = list(classifier.named_steps.values())[-1]
|
| 411 |
+
feature_importance = actual_classifier.feature_importances_
|
| 412 |
+
|
| 413 |
+
# Check if classifier has 'steps' attribute (another Pipeline format)
|
| 414 |
+
elif hasattr(classifier, 'steps'):
|
| 415 |
+
# Get the last step which is typically the classifier
|
| 416 |
+
actual_classifier = classifier.steps[-1][1]
|
| 417 |
+
feature_importance = actual_classifier.feature_importances_
|
| 418 |
+
|
| 419 |
+
# Direct classifier (not a pipeline)
|
| 420 |
+
elif hasattr(classifier, 'feature_importances_'):
|
| 421 |
+
feature_importance = classifier.feature_importances_
|
| 422 |
+
|
| 423 |
+
else:
|
| 424 |
+
raise AttributeError("No feature_importances_ found")
|
| 425 |
+
|
| 426 |
+
# Create feature importance plot
|
| 427 |
+
feature_names = ['age', 'gender', 'price', 'payment_method', 'shopping_mall']
|
| 428 |
+
importance_df = pd.DataFrame({
|
| 429 |
+
"Feature": feature_names,
|
| 430 |
+
"Importance": feature_importance
|
| 431 |
+
})
|
| 432 |
+
importance_df = importance_df.sort_values("Importance", ascending=True)
|
| 433 |
+
|
| 434 |
+
bar = px.bar(importance_df, x='Importance', y='Feature')
|
| 435 |
+
bar.update_layout(
|
| 436 |
+
title={
|
| 437 |
+
'text': 'Feature Importance Model Random Forest',
|
| 438 |
+
'xanchor': 'center',
|
| 439 |
+
'yanchor': 'top',
|
| 440 |
+
'x': 0.5,
|
| 441 |
+
'y': 0.95
|
| 442 |
+
}
|
| 443 |
+
)
|
| 444 |
+
st.plotly_chart(bar, use_container_width=True)
|
| 445 |
+
|
| 446 |
+
except (AttributeError, KeyError, IndexError) as e:
|
| 447 |
+
st.warning(f"โ ๏ธ Feature importance is not available for this model type.\n\nDetails: {str(e)}")
|
| 448 |
+
st.info("๐ก This usually happens when:\n- The model is a Pipeline without a RandomForest classifier\n- The model is loaded from ONNX format\n- The classifier doesn't support feature importance")
|
| 449 |
+
else:
|
| 450 |
+
st.info("๐ Feature importance is not available for ONNX models.\nPlease use Pickle model to view feature importance.")
|
| 451 |
+
|
| 452 |
+
st.write(f'<b>NOTES</b>: Hasil feature importance menunjukkan data fitur Price lebih dominan dibandingkan fitur lainnya dan evaluasi dengan Confusion Matrix terlihat sudah sangat cukup baik dalam hal identifikasi tiap kategori.', unsafe_allow_html=True)
|
| 453 |
+
|
| 454 |
+
#------------ PREDICT NEW DATA ---------
|
| 455 |
+
with st.expander("๐ฏ PREDICT NEW DATA"):
|
| 456 |
+
with st.form("input_form", clear_on_submit=True):
|
| 457 |
+
x1 = st.number_input("Age", min_value=0, max_value=100)
|
| 458 |
+
x2 = st.selectbox("Gender", ["Male", "Female"])
|
| 459 |
+
x3 = st.number_input("Price", min_value=0.0, max_value=10000.0, step=0.1)
|
| 460 |
+
x4 = st.selectbox("Payment Method", ["Cash", "Credit Card", "Debit Card"])
|
| 461 |
+
x5 = st.selectbox("Shopping Mall", ["Mall of Istanbul", "Kanyon",
|
| 462 |
+
"Metrocity", "Metropol AVM",
|
| 463 |
+
"Istinye Park", "Zorlu Center",
|
| 464 |
+
"Cevahir AVM", "Forum Istanbul",
|
| 465 |
+
"Viaport Outlet", "Emaar Square Mall"])
|
| 466 |
+
submitted = st.form_submit_button(label="๐ฎ PREDICT")
|
| 467 |
+
|
| 468 |
+
if submitted:
|
| 469 |
+
new_data = pd.DataFrame({'age': [x1], 'gender': [x2], 'price': [x3],
|
| 470 |
+
'payment_method': [x4], 'shopping_mall': [x5]})
|
| 471 |
+
|
| 472 |
+
le_gender = LabelEncoder()
|
| 473 |
+
le_payment_method = LabelEncoder()
|
| 474 |
+
le_shopping_mall = LabelEncoder()
|
| 475 |
+
|
| 476 |
+
# Fit with original data to ensure consistent encoding
|
| 477 |
+
le_gender.fit(retail['gender'])
|
| 478 |
+
le_payment_method.fit(retail['payment_method'])
|
| 479 |
+
le_shopping_mall.fit(retail['shopping_mall'])
|
| 480 |
+
|
| 481 |
+
new_data['gender'] = le_gender.transform(new_data['gender'])
|
| 482 |
+
new_data['payment_method'] = le_payment_method.transform(new_data['payment_method'])
|
| 483 |
+
new_data['shopping_mall'] = le_shopping_mall.transform(new_data['shopping_mall'])
|
| 484 |
+
|
| 485 |
+
# Apply transformations
|
| 486 |
+
new_data_scaled = scaler.transform(new_data)
|
| 487 |
+
if rfe is not None:
|
| 488 |
+
new_data_rfe = rfe.transform(new_data_scaled.reshape(1, -1))
|
| 489 |
+
else:
|
| 490 |
+
new_data_rfe = new_data_scaled.reshape(1, -1)
|
| 491 |
+
|
| 492 |
+
# Make prediction based on model type
|
| 493 |
+
if onnx_session is not None:
|
| 494 |
+
# ONNX Prediction
|
| 495 |
+
input_name = onnx_session.get_inputs()[0].name
|
| 496 |
+
label_name = onnx_session.get_outputs()[0].name
|
| 497 |
+
prob_name = onnx_session.get_outputs()[1].name
|
| 498 |
+
|
| 499 |
+
pred_result = onnx_session.run([label_name, prob_name],
|
| 500 |
+
{input_name: new_data_rfe.astype(np.float32)})
|
| 501 |
+
predict_category = pred_result[0]
|
| 502 |
+
predict_proba = pred_result[1]
|
| 503 |
+
else:
|
| 504 |
+
# Sklearn Prediction
|
| 505 |
+
if hasattr(classifier, 'named_steps') or hasattr(classifier, 'steps'):
|
| 506 |
+
predict_category = classifier.predict(new_data)
|
| 507 |
+
predict_proba = classifier.predict_proba(new_data)
|
| 508 |
+
else:
|
| 509 |
+
predict_category = classifier.predict(new_data_rfe)
|
| 510 |
+
predict_proba = classifier.predict_proba(new_data_rfe)
|
| 511 |
+
|
| 512 |
+
prediction = le_model.inverse_transform(predict_category)
|
| 513 |
+
|
| 514 |
+
st.write(f"<span style='font-size:34px; color:green;'>Predicted Category: </span> <span style='font-size:34px;'>{prediction[0]}</span>", unsafe_allow_html=True)
|
| 515 |
+
|
| 516 |
+
# Show probability
|
| 517 |
+
st.write("### Prediction Probability:")
|
| 518 |
+
target_names = ['Books', 'Clothing', 'Cosmetics', 'Food & Beverage',
|
| 519 |
+
'Shoes', 'Souvenir', 'Technology', 'Toys']
|
| 520 |
+
prob_df = pd.DataFrame({'Category': target_names, 'Probability': predict_proba[0]})
|
| 521 |
+
prob_df = prob_df.sort_values('Probability', ascending=False)
|
| 522 |
+
|
| 523 |
+
fig = px.bar(prob_df, x='Probability', y='Category', orientation='h',
|
| 524 |
+
title='Prediction Probability for Each Category')
|
| 525 |
+
st.plotly_chart(fig, use_container_width=True)
|
src/static/styles.css
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[data-test-id=metric-container] {
|
| 2 |
+
box-shadow: 0 0 4px #c9d6d6;
|
| 3 |
+
padding: auto;
|
| 4 |
+
}
|
| 5 |
+
|
| 6 |
+
.plot-container>div {
|
| 7 |
+
box-shadow: 0 0 4px #071021;
|
| 8 |
+
padding: auto;
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
div[data-testid="stExpander"] div[role="button"] p
|
| 15 |
+
{
|
| 16 |
+
font-size: 1.3rem;
|
| 17 |
+
}
|
| 18 |
+
div[data-testid="stDataframe"] div[role="button"] p {
|
| 19 |
+
font-size: 1.3rem;
|
| 20 |
+
color: rgb(1, 84, 84);
|
| 21 |
+
}
|