Spaces:

trohith89
/

Electronics-Sales-Classification

Sleeping

App Files Files Community

trohith89 commited on Jan 14, 2025

Commit

5aa5d27

verified ·

1 Parent(s): e335c32

Update pages/4_Model_Creation_and_Evaluation.py

Browse files

Files changed (1) hide show

pages/4_Model_Creation_and_Evaluation.py +136 -25

pages/4_Model_Creation_and_Evaluation.py CHANGED Viewed

@@ -3,6 +3,14 @@ import pandas as pd
 import numpy as np
 from io import StringIO
 import sys
 # Page configuration
 st.set_page_config(page_title="Predictive Modelling", layout="wide")
@@ -11,14 +19,15 @@ st.set_page_config(page_title="Predictive Modelling", layout="wide")
 st.markdown(
     """
     <h1 style="text-align: center; color: white;">📱 Predictive Model Creation and Evaluation 💻</h1>
-    """,
     unsafe_allow_html=True
 )
 # Flowchart title
 st.markdown(
     """
     <h1 style="text-align: center; color: white;">Model Creation Flow</h1>
-    """,
     unsafe_allow_html=True
 )
@@ -27,9 +36,10 @@ st.markdown(
     <div style="text-align: center;">
         <img src="https://cdn-uploads.huggingface.co/production/uploads/67441c51a784a9d15cb12871/70th8t5_UUCWKu25u6F9s.gif" alt="model-creation-flowchart.gif" width="90%" />
     </div>
-    """,
     unsafe_allow_html=True
 )
 df = st.session_state.get("dataset")
 # Exclude 'ProductID' from the dataset
@@ -39,26 +49,130 @@ if df is not None:
     st.subheader("Dataset Preview:")
     st.write(df.head())
-# Custom title styling
-st.markdown(
-    """
-    <style>
-        .title {
-            color: white;  /* White color for better visibility */
-            font-size: 36px;  /* Large font size */
-            font-weight: bold;  /* Bold text */
-            text-align: center;  /* Center alignment */
-            margin-top: 20px;
-        }
-    </style>
-    """,
-    unsafe_allow_html=True
-)
-# Custom background with overlay
 st.markdown(
     """
     <style>
@@ -66,10 +180,7 @@ st.markdown(
             background-image: url("https://cdn-uploads.huggingface.co/production/uploads/67441c51a784a9d15cb12871/FVcAdQ1wc7rCkfdnFsZft.jpeg");
             background-size: cover;
             background-position: center;
-            height: 100vh;
         }
-        /* Semi-transparent overlay */
         .stApp::before {
             content: "";
             position: absolute;
@@ -77,10 +188,10 @@ st.markdown(
             left: 0;
             width: 100%;
             height: 100%;
-            background: rgba(0, 0, 0, 0.4);  /* 40% transparency */
             z-index: -1;
         }
     </style>
-    """,
     unsafe_allow_html=True
-)

 import numpy as np
 from io import StringIO
 import sys
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
+from imblearn.over_sampling import SMOTE
+from sklearn.linear_model import LogisticRegression
+from sklearn.svm import SVC
+import optuna
+from sklearn.preprocessing import PolynomialFeatures
 # Page configuration
 st.set_page_config(page_title="Predictive Modelling", layout="wide")
 st.markdown(
     """
     <h1 style="text-align: center; color: white;">📱 Predictive Model Creation and Evaluation 💻</h1>
+    """,
     unsafe_allow_html=True
 )
 # Flowchart title
 st.markdown(
     """
     <h1 style="text-align: center; color: white;">Model Creation Flow</h1>
+    """,
     unsafe_allow_html=True
 )
     <div style="text-align: center;">
         <img src="https://cdn-uploads.huggingface.co/production/uploads/67441c51a784a9d15cb12871/70th8t5_UUCWKu25u6F9s.gif" alt="model-creation-flowchart.gif" width="90%" />
     </div>
+    """,
     unsafe_allow_html=True
 )
 df = st.session_state.get("dataset")
 # Exclude 'ProductID' from the dataset
     st.subheader("Dataset Preview:")
     st.write(df.head())
+    # Dropping unnecessary columns
+    df.drop(['age_bins', 'ProductPriceBucket', 'CustomerAgeGroup'], axis=1, inplace=True, errors='ignore')
+    st.write(df.head())
+    # Splitting Feature Variables and Class Labels
+    st.markdown("### Split Feature Variables and Class Labels")
+    fv = df.iloc[:, :-1]
+    cv = df.iloc[:, -1]
+    st.write(fv)
+    st.write(cv)
+    # Feature Engineering
+    st.markdown("### Feature Engineering")
+    label_encoder = LabelEncoder()
+    fv['ProductBrand'] = label_encoder.fit_transform(fv['ProductBrand'])
+    fv['ProductCategory'] = label_encoder.fit_transform(fv['ProductCategory'])
+    st.write(fv.head())
+    # Polynomial Featurisation for Non-Linearity
+    st.markdown("### Polynomial Featurisation for Non-Linearity:")
+    numeric_columns = fv.select_dtypes(include=[float, int]).columns
+    degree = 2
+    poly = PolynomialFeatures(degree=degree, include_bias=False)
+    poly_features = poly.fit_transform(fv[numeric_columns])
+    poly_feature_names = poly.get_feature_names_out(numeric_columns)
+    poly_df = pd.DataFrame(poly_features, columns=poly_feature_names)
+    fv_with_poly = pd.concat([fv.reset_index(drop=True), poly_df], axis=1)
+    fv_with_poly = fv_with_poly.loc[:, ~fv_with_poly.columns.duplicated()]
+    st.write(fv_with_poly.head())
+    # SMOTE for Handling Imbalanced Dataset
+    st.markdown("### SMOTE for Handling Imbalanced Dataset")
+    smote = SMOTE(sampling_strategy=1)
+    fv1, cv1 = smote.fit_resample(fv_with_poly, cv)
+    st.write(pd.Series(cv1).value_counts())
+    # Data Splitting
+    st.markdown("### Data Splitting")
+    x_train, x_test, y_train, y_test = train_test_split(fv1, cv1, test_size=0.2, random_state=42)
+    # Scaling
+    st.markdown("### Scaling")
+    std = StandardScaler()
+    x_train_std = std.fit_transform(x_train)
+    x_test_std = std.transform(x_test)
+    st.markdown("## Hyperparameter Tuning using OPTUNA")
+    # Define the objective function for Optuna
+    st.code("""
+    def objective(trial):
+        # Choose algorithm
+        algo = trial.suggest_categorical("algo", ["lor", "svc"])
+        if algo == "svc":
+            # SVC hyperparameters
+            c = trial.suggest_float("C", 0.001, 1000, log=True)
+            kernel = trial.suggest_categorical("kernel", ['linear', 'poly', 'rbf', 'sigmoid'])
+            if kernel == 'poly':
+                degree = trial.suggest_int("degree", 1, 3)
+                model = SVC(C=c, kernel=kernel, degree=degree, random_state=42)
+            elif kernel in ['rbf', 'sigmoid']:
+                gamma = trial.suggest_categorical("gamma", ['scale', 'auto'])
+                model = SVC(C=c, kernel=kernel, gamma=gamma, random_state=42)
+            else:
+                model = SVC(C=c, kernel=kernel, random_state=42)
+        else:
+            # Logistic Regression hyperparameters
+            solver, penalty = trial.suggest_categorical(
+                "choices", [
+                    ("lbfgs", "l2"), ("newton-cg", "l2"),
+                    ("sag", "l2"), ("saga", "l1"),
+                    ("saga", "l2"), ("saga", "elasticnet")
+                ]
+            )
+            reg_strength = trial.suggest_float("C", 0.001, 1000, log=True)
+            l1_ratio = trial.suggest_float("l1_ratio", 0, 1) if penalty == "elasticnet" else None
+            if penalty == "elasticnet":
+                model = LogisticRegression(
+                    solver=solver, penalty=penalty, C=reg_strength,
+                    l1_ratio=l1_ratio, random_state=42
+                )
+            else:
+                model = LogisticRegression(
+                    solver=solver, penalty=penalty, C=reg_strength, random_state=42
+                )
+        # Perform cross-validation and return the mean score
+        score = cross_val_score(model, x_train_std, y_train, cv=5, scoring="accuracy").mean()
+        return score
+    """, language="python")
+# Create and optimize the study
+st.code("""
+study = optuna.create_study(direction="maximize")
+study.optimize(objective, n_trials=100)
+# Display the best parameters
+st.write("Best Parameters:", study.best_params)
+""", language="python")
+    # Create the best model
+    st.markdown("## Create the Best Model")
+    model = SVC(kernel='rbf', gamma='scale', C=53.123097332514455)
+    st.write(model)
+    # Train the model
+    st.markdown("### Train the Model")
+    model.fit(x_train_std, y_train)
+    # Model Evaluation
+    st.markdown("# Model Evaluation")
+    y_pred = model.predict(x_test_std)
+    st.write("Accuracy:", accuracy_score(y_test, y_pred))
+    st.write("Classification Report:\n", classification_report(y_test, y_pred))
+    st.write("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
+else:
+    st.warning("No Dataset Found")
+# Custom background styling
 st.markdown(
     """
     <style>
             background-image: url("https://cdn-uploads.huggingface.co/production/uploads/67441c51a784a9d15cb12871/FVcAdQ1wc7rCkfdnFsZft.jpeg");
             background-size: cover;
             background-position: center;
         }
         .stApp::before {
             content: "";
             position: absolute;
             left: 0;
             width: 100%;
             height: 100%;
+            background: rgba(0, 0, 0, 0.4);
             z-index: -1;
         }
     </style>
+    """,
     unsafe_allow_html=True
+)