Spaces:

fendy07
/

customer-predict

Sleeping

App Files Files Community

fendy07 commited on Dec 9, 2025

Commit

899d70c

1 Parent(s): 0822fa4

Update project structure and add new pages

Browse files

Files changed (5) hide show

requirements.txt +0 -1
src/pages/about.py +44 -0
src/pages/dashboard.py +99 -0
src/pages/predict.py +525 -0
src/static/styles.css +21 -0

requirements.txt CHANGED Viewed

@@ -8,7 +8,6 @@ plotly
 requests
 scikit-learn
 imbalanced-learn
-pickle
 joblib
 onnx
 skl2onnx

 requests
 scikit-learn
 imbalanced-learn
 joblib
 onnx
 skl2onnx

src/pages/about.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import streamlit as st
+st.title('Author Project')
+col1, col2 = st.columns(2, gap='small', vertical_alignment='center')
+with col1:
+    st.image("images/Fendy.png", width=250)
+with col2:
+    st.title("Fendy Hendriyanto", anchor=False)
+    st.write(
+        "AI Engineer and Instructor"
+    )
+    st.write(
+        "Assisting and mentoring students to help analyze and supporting data driven with creativity and decision making."
+    )
+#--- EXPERIENCE & QUALIFICATIONS ------
+st.write("\n")
+st.subheader("Experience and Qualifications", anchor=False)
+st.write(
+    """
+    - 2 years experience coaching and mentoring about Artificial Intelligence
+    - Strong hands-on experience and knowledge in Python and Data Science
+    - Proficient in using various libraries and tools such as TensorFlow, Keras, Scikit-learn, OpenCV, Pandas
+    - Good understanding and analyzing of statistical principles and their perspective applications
+    - Excellent team player and initiative on tasks
+    """
+)
+# ---- SKILLS ----
+st.write("\n")
+st.subheader("Hard Skills", anchor=False)
+st.write(
+    """
+    - Programming : Python (Pandas, Scikit-learn, Scikit-image), R, SQL, JavaScript
+    - Data Visualization : Tableau, Spreadsheet, Excel
+    - Modelling : Tensorflow, Keras, PyCaret, XGBoost, CometML
+    - Databases : MySQL, PostgreSQL, SQLite
+    - Deployment : Streamlit, Flask, Gradio, Huggingface, Git
+    - Frameworks : OpenCV, NLTK
+    """
+)

src/pages/dashboard.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import pandas as pd
+import seaborn as sns
+import streamlit as st
+import plotly.express as px
+import matplotlib.pyplot as plt
+import plotly.graph_objects as go
+st.title("Dashboard Analysis Customer Retail")
+# Load CSS style
+with open('static/styles.css') as f:
+    st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
+# Load dataset
+@st.cache_data
+def load_data():
+    data = pd.read_csv('data/customer_shopping_data.csv')
+    return data
+data = load_data()
+with st.expander("HASIL DATA"):
+    data = pd.DataFrame({
+        'InvoiceNo': data['invoice_no'],
+        'CustomerID': data['customer_id'],
+        'Gender': data['gender'],
+        'Age': data['age'],
+        'Category': data['category'],
+        'Quantity': data['quantity'],
+        'Price': data['price'],
+        'PaymentMethod': data['payment_method'],
+        'InvoiceDate': data['invoice_date'],
+        'ShoppingMall': data['shopping_mall']
+    })
+    st.dataframe(data, use_container_width=True)
+# Download Dataset
+download = data.to_csv(index=False).encode('utf-8')
+st.download_button(label = "DOWNLOAD DATASET",
+                   data = download,
+                   key = 'download_data.csv',
+                   file_name = 'dataset_retail.csv')
+# Visualization
+with st.expander("DISTRIBUSI KATEGORI DAN PEMBAYARAN"):
+    col1, col2 = st.columns(2)
+    with col1:
+        data_quantity = data.groupby('Category')['Quantity'].sum()
+        # Plot Pie Chart
+        plt.figure(figsize = (10, 8))
+        plt.pie(data_quantity.values, labels = data_quantity.index,
+                autopct = '%1.1f%%', colors = sns.color_palette("pastel"))
+        # Title
+        plt.title('Kuantitas Produk Berdasarkan Kategori', fontsize = 16)
+        st.pyplot(plt)
+    with col2:
+        payment_counts = data['PaymentMethod'].value_counts()
+        fig = px.bar(x = payment_counts.index, y = payment_counts.values,
+             labels = {'x': 'Metode Pembayaran', 'y': 'Jumlah Transaksi'},
+             color = payment_counts.index)
+        fig.update_layout(font_size = 14)
+        title = fig.update_layout(title = {'text': 'Distribusi Metode Pembayaran',
+                                           'xanchor': 'center',
+                                           'yanchor': 'top',
+                                           'x': 0.5,
+                                           'y': 0.95})
+        st.plotly_chart(fig, use_container_width=True)
+    st.write(f"<b>NOTES</b>: Distribusi dalam kategori berdasarkan kuantitas kategori produk yang sering dibeli oleh pelanggan adalah baju, kosmetik dan F&B. Sedangkan, metode pembayaran dengan transaksi terbanyak adalah Cash dan Credit.", unsafe_allow_html=True)
+with st.expander("TOTAL PENDAPATAN DAN PENJUALAN"):
+    col1, col2 = st.columns(2)
+    with col1:
+        total_revenue = data.groupby('ShoppingMall')['Price'].sum()
+        fig = px.bar(x = total_revenue.index, y = total_revenue.values,
+                     labels = {'x': 'Mall', 'y': 'Total Pendapatan'},
+                     color = total_revenue.index)
+        title = fig.update_layout(title = {'text': 'Total Pendapatan Setiap Pusat Perbelanjaan',
+                                           'xanchor': 'center',
+                                           'yanchor': 'top',
+                                           'x': 0.5,
+                                           'y': 0.95})
+        st.plotly_chart(fig, use_container_width=True)
+    with col2:
+        total_sales = data.groupby('ShoppingMall')['Quantity'].sum().sort_values(ascending=False)
+        fig = px.bar(x = total_sales.index, y = total_sales.values,
+                     labels = {'x': 'Mall', 'y': 'Total Penjualan'},
+                     color = total_sales.index)
+        title = fig.update_layout(title = {'text': 'Total Penjualan Setiap Pusat Perbelanjaan',
+                                           'xanchor': 'center',
+                                           'yanchor': 'top',
+                                           'x': 0.5,
+                                           'y': 0.95})
+        st.plotly_chart(fig, use_container_width=True)
+    st.write(f"<b>NOTES</b>: Pusat perbelanjaan dengan total pendapatan dan penjualan tertinggi adalah Mall of Istanbul, diikuti oleh Mall Kanyon dan Mall Metrocity.", unsafe_allow_html=True)

src/pages/predict.py ADDED Viewed

	@@ -0,0 +1,525 @@

+import os
+import pickle
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import streamlit as st
+import onnxruntime as ort
+import plotly.express as px
+from scipy.stats import zscore
+import matplotlib.pyplot as plt
+from skl2onnx import convert_sklearn
+from sklearn.feature_selection import RFE
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from skl2onnx.common.data_types import FloatTensorType
+from streamlit_extras.metric_cards import style_metric_cards
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
+st.title("Customer Category Prediction (Case: Turkey Customer)")
+st.write("Prediction Customer in Turkey with Probability Using Ensemble Technique Based")
+# Load CSS style
+with open('static/styles.css') as f:
+    st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
+# Load Dataset
+retail = pd.read_csv('data/customer_shopping_data.csv')
+X = retail.loc[:, ['age', 'gender', 'price', 'payment_method', 'shopping_mall']]
+y = retail[['category']]
+# Encode categorical variables
+le = LabelEncoder()
+X['gender'] = le.fit_transform(X['gender'])
+X['payment_method'] = le.fit_transform(X['payment_method'])
+X['shopping_mall'] = le.fit_transform(X['shopping_mall'])
+y_encoded = le.fit_transform(y)
+# Splitting data
+X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=44)
+# Preprocessing
+scaler = StandardScaler()
+X_train_scaled = scaler.fit_transform(X_train)
+X_test_scaled = scaler.transform(X_test)
+# Outlier detection using Z-Score
+z_scores = np.abs(zscore(X_train_scaled))
+threshold = 5
+outliers = np.where(z_scores > threshold)
+X_train_clean = X_train_scaled[(z_scores < threshold).all(axis=1)]
+y_train_clean = y_train[(z_scores < threshold).all(axis=1)]
+#------------ MODEL TRAINING SECTION ---------
+with st.expander("🔄 MODEL TRAINING & MANAGEMENT"):
+    st.subheader("Train or Load Model")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.write("### Training Parameters")
+        n_estimators = st.slider("Number of Trees (n_estimators)",
+                                 min_value=50, max_value=500, value=300, step=50)
+        test_size = st.slider("Test Size",
+                             min_value=0.1, max_value=0.4, value=0.2, step=0.05)
+        random_state = st.number_input("Random State",
+                                      min_value=0, max_value=100, value=44)
+        n_features = st.slider("Number of Features to Select (RFE)",
+                              min_value=1, max_value=5, value=5)
+        train_button = st.button("🚀 TRAIN NEW MODEL", type="primary")
+    with col2:
+        st.write("### Model Management")
+        model_format = st.radio("Choose Model Format:",
+                               ["ONNX Model (.onnx)", "Pickle Model (.pkl)"])
+        load_option = st.radio("Choose Model Source:",
+                              ["Load Existing Model", "Use Newly Trained Model"])
+        if load_option == "Load Existing Model":
+            if model_format == "ONNX Model (.onnx)":
+                model_path = 'model/best_model_rf.onnx'
+                metadata_path = 'model/model_metadata.pkl'
+                if os.path.exists(model_path) and os.path.exists(metadata_path):
+                    st.success("✅ ONNX model found!")
+                    model_loaded = True
+                    use_onnx = True
+                else:
+                    st.error("❌ ONNX model not found. Please train a new model first.")
+                    model_loaded = False
+                    use_onnx = False
+            else:
+                model_path = 'model/best_model_rf.pkl'
+                if os.path.exists(model_path):
+                    st.success("✅ Pickle model found!")
+                    model_loaded = True
+                    use_onnx = False
+                else:
+                    st.error("❌ Pickle model not found. Please train a new model first.")
+                    model_loaded = False
+                    use_onnx = False
+        else:
+            model_loaded = False
+            use_onnx = False
+# Initialize session state for model
+if 'trained_model' not in st.session_state:
+    st.session_state.trained_model = None
+    st.session_state.trained_rfe = None
+    st.session_state.trained_scaler = None
+    st.session_state.trained_le = None
+    st.session_state.model_metrics = None
+    st.session_state.onnx_session = None
+# Train new model
+if train_button:
+    with st.spinner("Training model... Please wait..."):
+        # Re-split data with new test_size
+        X_train_new, X_test_new, y_train_new, y_test_new = train_test_split(
+            X, y_encoded, test_size=test_size, random_state=random_state
+        )
+        # Preprocessing
+        scaler_new = StandardScaler()
+        X_train_scaled_new = scaler_new.fit_transform(X_train_new)
+        X_test_scaled_new = scaler_new.transform(X_test_new)
+        # Outlier removal
+        z_scores_new = np.abs(zscore(X_train_scaled_new))
+        X_train_clean_new = X_train_scaled_new[(z_scores_new < threshold).all(axis=1)]
+        y_train_clean_new = y_train_new[(z_scores_new < threshold).all(axis=1)]
+        # Model training with RFE
+        classifier_new = RandomForestClassifier(n_estimators=n_estimators, random_state=random_state)
+        rfe_new = RFE(classifier_new, n_features_to_select=n_features)
+        X_train_rfe = rfe_new.fit_transform(X_train_clean_new, y_train_clean_new)
+        X_test_rfe = rfe_new.transform(X_test_scaled_new)
+        # Fit the model
+        classifier_new.fit(X_train_rfe, y_train_clean_new)
+        # Predictions
+        y_pred_new = classifier_new.predict(X_test_rfe)
+        # Calculate metrics
+        metrics = {
+            'accuracy': accuracy_score(y_test_new, y_pred_new),
+            'precision': precision_score(y_test_new, y_pred_new, average='weighted'),
+            'recall': recall_score(y_test_new, y_pred_new, average='weighted'),
+            'f1_score': f1_score(y_test_new, y_pred_new, average='weighted')
+        }
+        # Save to session state
+        st.session_state.trained_model = classifier_new
+        st.session_state.trained_rfe = rfe_new
+        st.session_state.trained_scaler = scaler_new
+        st.session_state.trained_le = le
+        st.session_state.model_metrics = metrics
+        st.session_state.X_test = X_test_rfe
+        st.session_state.y_test = y_test_new
+        st.session_state.y_pred = y_pred_new
+        # Save as Pickle
+        model_package = {
+            'classifier': classifier_new,
+            'rfe': rfe_new,
+            'scaler': scaler_new,
+            'label_encoder': le,
+            'metrics': metrics,
+            'n_features': n_features
+        }
+        with open('model/best_model_rf.pkl', 'wb') as f:
+            pickle.dump(model_package, f)
+        # Convert and Save as ONNX
+        try:
+            # Define initial type for ONNX conversion
+            initial_type = [('float_input', FloatTensorType([None, n_features]))]
+            # Convert model to ONNX
+            onnx_model = convert_sklearn(classifier_new, initial_types=initial_type,
+                                        target_opset=12)
+            # Save ONNX model
+            with open('model/best_model_rf.onnx', 'wb') as f:
+                f.write(onnx_model.SerializeToString())
+            # Save metadata (scaler, rfe, label_encoder) separately
+            metadata = {
+                'scaler': scaler_new,
+                'rfe': rfe_new,
+                'label_encoder': le,
+                'metrics': metrics,
+                'n_features': n_features,
+                'feature_names': ['age', 'gender', 'price', 'payment_method', 'shopping_mall']
+            }
+            with open('model/model_metadata.pkl', 'wb') as f:
+                pickle.dump(metadata, f)
+            st.success(f"✅ Model trained and saved successfully!")
+            st.success(f"📊 Accuracy: {metrics['accuracy']:.4f}")
+            st.success(f"💾 Saved as: Pickle (.pkl) and ONNX (.onnx)")
+        except Exception as e:
+            st.warning(f"⚠️ Model saved as Pickle only. ONNX conversion failed: {str(e)}")
+        st.balloons()
+# Determine which model to use
+if load_option == "Use Newly Trained Model" and st.session_state.trained_model is not None:
+    classifier = st.session_state.trained_model
+    rfe = st.session_state.trained_rfe
+    scaler = st.session_state.trained_scaler
+    le_model = st.session_state.trained_le
+    X_test_final = st.session_state.X_test
+    y_test_final = st.session_state.y_test
+    y_pred_final = st.session_state.y_pred
+    accuracy = st.session_state.model_metrics['accuracy']
+    precision = st.session_state.model_metrics['precision']
+    recall = st.session_state.model_metrics['recall']
+    f1 = st.session_state.model_metrics['f1_score']
+    onnx_session = None
+    st.info("🔵 Using newly trained model from this session")
+elif model_loaded and use_onnx:
+    # Load ONNX Model
+    try:
+        onnx_session = ort.InferenceSession('model/best_model_rf.onnx')
+        # Load metadata
+        with open('model/model_metadata.pkl', 'rb') as f:
+            metadata = pickle.load(f)
+        scaler = metadata['scaler']
+        rfe = metadata['rfe']
+        le_model = metadata['label_encoder']
+        metrics = metadata.get('metrics', {})
+        # Apply transformations
+        X_train_rfe = rfe.fit_transform(X_train_clean, y_train_clean)
+        X_test_final = rfe.transform(X_test_scaled)
+        # Predict using ONNX
+        input_name = onnx_session.get_inputs()[0].name
+        label_name = onnx_session.get_outputs()[0].name
+        y_pred_final = onnx_session.run([label_name], {input_name: X_test_final.astype(np.float32)})[0]
+        y_test_final = y_test
+        # Calculate metrics
+        accuracy = metrics.get('accuracy', accuracy_score(y_test_final, y_pred_final))
+        precision = metrics.get('precision', precision_score(y_test_final, y_pred_final, average='weighted'))
+        recall = metrics.get('recall', recall_score(y_test_final, y_pred_final, average='weighted'))
+        f1 = metrics.get('f1_score', f1_score(y_test_final, y_pred_final, average='weighted'))
+        classifier = None  # ONNX doesn't need sklearn classifier
+        st.info("🟢 Using ONNX model from file")
+    except Exception as e:
+        st.error(f"Failed to load ONNX model: {str(e)}")
+        st.warning("Falling back to default model...")
+        model_loaded = False
+        use_onnx = False
+        onnx_session = None
+elif model_loaded and not use_onnx:
+    # Load Pickle Model
+    with open('model/best_model_rf.pkl', 'rb') as f:
+        model_data = pickle.load(f)
+    if isinstance(model_data, dict):
+        classifier = model_data['classifier']
+        rfe = model_data.get('rfe', None)
+        scaler = model_data.get('scaler', scaler)
+        le_model = model_data.get('label_encoder', le)
+        if rfe is None:
+            rfe = RFE(classifier, n_features_to_select=5)
+        # Apply transformations
+        X_train_rfe = rfe.fit_transform(X_train_clean, y_train_clean)
+        X_test_final = rfe.transform(X_test_scaled)
+        classifier.fit(X_train_rfe, y_train_clean)
+        y_pred_final = classifier.predict(X_test_final)
+        y_test_final = y_test
+        # Calculate metrics
+        accuracy = accuracy_score(y_test_final, y_pred_final)
+        precision = precision_score(y_test_final, y_pred_final, average='weighted')
+        recall = recall_score(y_test_final, y_pred_final, average='weighted')
+        f1 = f1_score(y_test_final, y_pred_final, average='weighted')
+    else:
+        classifier = model_data
+        le_model = le
+        if hasattr(classifier, 'named_steps') or hasattr(classifier, 'steps'):
+            y_pred_final = classifier.predict(X_test)
+            y_test_final = y_test
+            X_test_final = X_test_scaled
+            rfe = None
+        else:
+            rfe = RFE(classifier, n_features_to_select=5)
+            X_train_rfe = rfe.fit_transform(X_train_clean, y_train_clean)
+            X_test_final = rfe.transform(X_test_scaled)
+            classifier.fit(X_train_rfe, y_train_clean)
+            y_pred_final = classifier.predict(X_test_final)
+            y_test_final = y_test
+        accuracy = accuracy_score(y_test_final, y_pred_final)
+        precision = precision_score(y_test_final, y_pred_final, average='weighted')
+        recall = recall_score(y_test_final, y_pred_final, average='weighted')
+        f1 = f1_score(y_test_final, y_pred_final, average='weighted')
+    onnx_session = None
+    st.info("🟢 Using Pickle model from file")
+else:
+    # Default: train on the fly
+    classifier = RandomForestClassifier(n_estimators=300, random_state=44)
+    rfe = RFE(classifier, n_features_to_select=5)
+    X_train_rfe = rfe.fit_transform(X_train_clean, y_train_clean)
+    X_test_final = rfe.transform(X_test_scaled)
+    classifier.fit(X_train_rfe, y_train_clean)
+    y_pred_final = classifier.predict(X_test_final)
+    y_test_final = y_test
+    le_model = le
+    accuracy = accuracy_score(y_test_final, y_pred_final)
+    precision = precision_score(y_test_final, y_pred_final, average='weighted')
+    recall = recall_score(y_test_final, y_pred_final, average='weighted')
+    f1 = f1_score(y_test_final, y_pred_final, average='weighted')
+    onnx_session = None
+    st.warning("⚠️ Using default model (trained on-the-fly)")
+# Evaluation Metrics
+with st.expander("📊 EVALUATION METRICS"):
+    col1, col2, col3, col4 = st.columns(4)
+    col1.metric("ACCURACY", value=f'{accuracy:.4f}', delta='Accuracy Score')
+    col2.metric("PRECISION", value=f'{precision:.4f}', delta='Precision Score With Weighted Average')
+    col3.metric("RECALL", value=f'{recall:.4f}', delta='Recall Score With Weighted Average')
+    col4.metric("F1 SCORE", value=f'{f1:.4f}', delta='F1 Score with Weighted Average')
+    style_metric_cards(background_color='#FFFFFF', border_left_color='#9900AD', border_color='#1F66BD', box_shadow='#F71938')
+    st.write(f"<b>NOTES</b>: Hasil evaluasi metriks yang diterapkan sangat baik dan sudah sesuai dengan hasil pelatihan model algoritma Random Forest.", unsafe_allow_html=True)
+# Prediction Table
+with st.expander("📋 PREDICTION TABLE"):
+    prediction_table = pd.DataFrame({
+        'age': X_test_final[:, 0].ravel(),
+        'gender': X_test_final[:, 1].ravel(),
+        'price': X_test_final[:, 2].ravel(),
+        'payment_method': X_test_final[:, 3].ravel(),
+        'shopping_mall': X_test_final[:, 4].ravel(),
+        'Category | Actual Y': y_test_final.ravel(),
+        'Y_Predicted': y_pred_final.ravel(),
+        'Accuracy': [accuracy] * len(y_test_final),
+        'Precision': [precision] * len(y_test_final),
+        'Recall': [recall] * len(y_test_final),
+        'F1 Score': [f1] * len(y_test_final)
+    })
+    st.dataframe(prediction_table, use_container_width=True)
+    st.write(f'<b>NOTES</b>: Pada bagian tabel prediksi ini menggunakan data yang telah diolah sebelumnya sehingga sangat berbeda dengan data asli.', unsafe_allow_html=True)
+# Download Predicted Table in CSV
+df_predict = prediction_table.to_csv(index=False).encode('utf-8')
+st.download_button(label="📥 DOWNLOAD PREDICTED DATA",
+                   data=df_predict,
+                   key="download_predict.csv",
+                   file_name='data_predict.csv')
+# Confusion Matrix and Feature Importance
+with st.expander("🔍 CONFUSION MATRIX & FEATURE IMPORTANCE"):
+    col1, col2 = st.columns(2)
+    with col1:
+        target_names = ['Books', 'Clothing', 'Cosmetics', 'Food & Beverage',
+                       'Shoes', 'Souvenir', 'Technology', 'Toys']
+        cm = confusion_matrix(y_test_final, y_pred_final)
+        plt.figure(figsize=(15, 8))
+        sns.heatmap(cm, annot=True, cmap='Blues', fmt='d', xticklabels=target_names, yticklabels=target_names)
+        plt.title('Confusion Matrix Customer Category Prediction')
+        plt.xlabel('Predicted labels')
+        plt.ylabel('True labels')
+        st.pyplot(fig=plt, use_container_width=True)
+# Ganti bagian Feature Importance (sekitar baris 390-410) dengan kode ini:
+with col2:
+    # Feature Importance only available for sklearn models, not ONNX
+    if classifier is not None:
+        try:
+            # Check if classifier is a Pipeline
+            if hasattr(classifier, 'named_steps'):
+                # Try common pipeline step names
+                if 'randomforestclassifier' in classifier.named_steps:
+                    actual_classifier = classifier.named_steps['randomforestclassifier']
+                elif 'classifier' in classifier.named_steps:
+                    actual_classifier = classifier.named_steps['classifier']
+                elif 'model' in classifier.named_steps:
+                    actual_classifier = classifier.named_steps['model']
+                else:
+                    # Get the last step (usually the classifier)
+                    actual_classifier = list(classifier.named_steps.values())[-1]
+                feature_importance = actual_classifier.feature_importances_
+            # Check if classifier has 'steps' attribute (another Pipeline format)
+            elif hasattr(classifier, 'steps'):
+                # Get the last step which is typically the classifier
+                actual_classifier = classifier.steps[-1][1]
+                feature_importance = actual_classifier.feature_importances_
+            # Direct classifier (not a pipeline)
+            elif hasattr(classifier, 'feature_importances_'):
+                feature_importance = classifier.feature_importances_
+            else:
+                raise AttributeError("No feature_importances_ found")
+            # Create feature importance plot
+            feature_names = ['age', 'gender', 'price', 'payment_method', 'shopping_mall']
+            importance_df = pd.DataFrame({
+                "Feature": feature_names,
+                "Importance": feature_importance
+            })
+            importance_df = importance_df.sort_values("Importance", ascending=True)
+            bar = px.bar(importance_df, x='Importance', y='Feature')
+            bar.update_layout(
+                title={
+                    'text': 'Feature Importance Model Random Forest',
+                    'xanchor': 'center',
+                    'yanchor': 'top',
+                    'x': 0.5,
+                    'y': 0.95
+                }
+            )
+            st.plotly_chart(bar, use_container_width=True)
+        except (AttributeError, KeyError, IndexError) as e:
+            st.warning(f"⚠️ Feature importance is not available for this model type.\n\nDetails: {str(e)}")
+            st.info("💡 This usually happens when:\n- The model is a Pipeline without a RandomForest classifier\n- The model is loaded from ONNX format\n- The classifier doesn't support feature importance")
+    else:
+        st.info("📊 Feature importance is not available for ONNX models.\nPlease use Pickle model to view feature importance.")
+    st.write(f'<b>NOTES</b>: Hasil feature importance menunjukkan data fitur Price lebih dominan dibandingkan fitur lainnya dan evaluasi dengan Confusion Matrix terlihat sudah sangat cukup baik dalam hal identifikasi tiap kategori.', unsafe_allow_html=True)
+#------------ PREDICT NEW DATA ---------
+with st.expander("🎯 PREDICT NEW DATA"):
+    with st.form("input_form", clear_on_submit=True):
+        x1 = st.number_input("Age", min_value=0, max_value=100)
+        x2 = st.selectbox("Gender", ["Male", "Female"])
+        x3 = st.number_input("Price", min_value=0.0, max_value=10000.0, step=0.1)
+        x4 = st.selectbox("Payment Method", ["Cash", "Credit Card", "Debit Card"])
+        x5 = st.selectbox("Shopping Mall", ["Mall of Istanbul", "Kanyon",
+                                           "Metrocity", "Metropol AVM",
+                                           "Istinye Park", "Zorlu Center",
+                                           "Cevahir AVM", "Forum Istanbul",
+                                           "Viaport Outlet", "Emaar Square Mall"])
+        submitted = st.form_submit_button(label="🔮 PREDICT")
+if submitted:
+    new_data = pd.DataFrame({'age': [x1], 'gender': [x2], 'price': [x3],
+                            'payment_method': [x4], 'shopping_mall': [x5]})
+    le_gender = LabelEncoder()
+    le_payment_method = LabelEncoder()
+    le_shopping_mall = LabelEncoder()
+    # Fit with original data to ensure consistent encoding
+    le_gender.fit(retail['gender'])
+    le_payment_method.fit(retail['payment_method'])
+    le_shopping_mall.fit(retail['shopping_mall'])
+    new_data['gender'] = le_gender.transform(new_data['gender'])
+    new_data['payment_method'] = le_payment_method.transform(new_data['payment_method'])
+    new_data['shopping_mall'] = le_shopping_mall.transform(new_data['shopping_mall'])
+    # Apply transformations
+    new_data_scaled = scaler.transform(new_data)
+    if rfe is not None:
+        new_data_rfe = rfe.transform(new_data_scaled.reshape(1, -1))
+    else:
+        new_data_rfe = new_data_scaled.reshape(1, -1)
+    # Make prediction based on model type
+    if onnx_session is not None:
+        # ONNX Prediction
+        input_name = onnx_session.get_inputs()[0].name
+        label_name = onnx_session.get_outputs()[0].name
+        prob_name = onnx_session.get_outputs()[1].name
+        pred_result = onnx_session.run([label_name, prob_name],
+                                       {input_name: new_data_rfe.astype(np.float32)})
+        predict_category = pred_result[0]
+        predict_proba = pred_result[1]
+    else:
+        # Sklearn Prediction
+        if hasattr(classifier, 'named_steps') or hasattr(classifier, 'steps'):
+            predict_category = classifier.predict(new_data)
+            predict_proba = classifier.predict_proba(new_data)
+        else:
+            predict_category = classifier.predict(new_data_rfe)
+            predict_proba = classifier.predict_proba(new_data_rfe)
+    prediction = le_model.inverse_transform(predict_category)
+    st.write(f"<span style='font-size:34px; color:green;'>Predicted Category: </span> <span style='font-size:34px;'>{prediction[0]}</span>", unsafe_allow_html=True)
+    # Show probability
+    st.write("### Prediction Probability:")
+    target_names = ['Books', 'Clothing', 'Cosmetics', 'Food & Beverage',
+                   'Shoes', 'Souvenir', 'Technology', 'Toys']
+    prob_df = pd.DataFrame({'Category': target_names, 'Probability': predict_proba[0]})
+    prob_df = prob_df.sort_values('Probability', ascending=False)
+    fig = px.bar(prob_df, x='Probability', y='Category', orientation='h',
+                 title='Prediction Probability for Each Category')
+    st.plotly_chart(fig, use_container_width=True)

src/static/styles.css ADDED Viewed

	@@ -0,0 +1,21 @@

+[data-test-id=metric-container] {
+    box-shadow: 0 0 4px #c9d6d6;
+    padding: auto;
+}
+.plot-container>div {
+    box-shadow: 0 0 4px #071021;
+    padding: auto;
+}
+div[data-testid="stExpander"] div[role="button"] p
+{
+    font-size: 1.3rem;
+}
+div[data-testid="stDataframe"] div[role="button"] p {
+    font-size: 1.3rem;
+    color: rgb(1, 84, 84);
+}