trohith89 commited on
Commit
5aa5d27
·
verified ·
1 Parent(s): e335c32

Update pages/4_Model_Creation_and_Evaluation.py

Browse files
pages/4_Model_Creation_and_Evaluation.py CHANGED
@@ -3,6 +3,14 @@ import pandas as pd
3
  import numpy as np
4
  from io import StringIO
5
  import sys
 
 
 
 
 
 
 
 
6
 
7
  # Page configuration
8
  st.set_page_config(page_title="Predictive Modelling", layout="wide")
@@ -11,14 +19,15 @@ st.set_page_config(page_title="Predictive Modelling", layout="wide")
11
  st.markdown(
12
  """
13
  <h1 style="text-align: center; color: white;">📱 Predictive Model Creation and Evaluation 💻</h1>
14
- """,
15
  unsafe_allow_html=True
16
  )
 
17
  # Flowchart title
18
  st.markdown(
19
  """
20
  <h1 style="text-align: center; color: white;">Model Creation Flow</h1>
21
- """,
22
  unsafe_allow_html=True
23
  )
24
 
@@ -27,9 +36,10 @@ st.markdown(
27
  <div style="text-align: center;">
28
  <img src="https://cdn-uploads.huggingface.co/production/uploads/67441c51a784a9d15cb12871/70th8t5_UUCWKu25u6F9s.gif" alt="model-creation-flowchart.gif" width="90%" />
29
  </div>
30
- """,
31
  unsafe_allow_html=True
32
  )
 
33
  df = st.session_state.get("dataset")
34
 
35
  # Exclude 'ProductID' from the dataset
@@ -39,26 +49,130 @@ if df is not None:
39
  st.subheader("Dataset Preview:")
40
  st.write(df.head())
41
 
 
 
 
42
 
43
- # Custom title styling
44
- st.markdown(
45
- """
46
- <style>
47
- .title {
48
- color: white; /* White color for better visibility */
49
- font-size: 36px; /* Large font size */
50
- font-weight: bold; /* Bold text */
51
- text-align: center; /* Center alignment */
52
- margin-top: 20px;
53
- }
54
- </style>
55
- """,
56
- unsafe_allow_html=True
57
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
 
 
 
59
 
 
 
 
 
 
60
 
61
- # Custom background with overlay
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  st.markdown(
63
  """
64
  <style>
@@ -66,10 +180,7 @@ st.markdown(
66
  background-image: url("https://cdn-uploads.huggingface.co/production/uploads/67441c51a784a9d15cb12871/FVcAdQ1wc7rCkfdnFsZft.jpeg");
67
  background-size: cover;
68
  background-position: center;
69
- height: 100vh;
70
  }
71
-
72
- /* Semi-transparent overlay */
73
  .stApp::before {
74
  content: "";
75
  position: absolute;
@@ -77,10 +188,10 @@ st.markdown(
77
  left: 0;
78
  width: 100%;
79
  height: 100%;
80
- background: rgba(0, 0, 0, 0.4); /* 40% transparency */
81
  z-index: -1;
82
  }
83
  </style>
84
- """,
85
  unsafe_allow_html=True
86
- )
 
3
  import numpy as np
4
  from io import StringIO
5
  import sys
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
8
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
9
+ from imblearn.over_sampling import SMOTE
10
+ from sklearn.linear_model import LogisticRegression
11
+ from sklearn.svm import SVC
12
+ import optuna
13
+ from sklearn.preprocessing import PolynomialFeatures
14
 
15
  # Page configuration
16
  st.set_page_config(page_title="Predictive Modelling", layout="wide")
 
19
  st.markdown(
20
  """
21
  <h1 style="text-align: center; color: white;">📱 Predictive Model Creation and Evaluation 💻</h1>
22
+ """,
23
  unsafe_allow_html=True
24
  )
25
+
26
  # Flowchart title
27
  st.markdown(
28
  """
29
  <h1 style="text-align: center; color: white;">Model Creation Flow</h1>
30
+ """,
31
  unsafe_allow_html=True
32
  )
33
 
 
36
  <div style="text-align: center;">
37
  <img src="https://cdn-uploads.huggingface.co/production/uploads/67441c51a784a9d15cb12871/70th8t5_UUCWKu25u6F9s.gif" alt="model-creation-flowchart.gif" width="90%" />
38
  </div>
39
+ """,
40
  unsafe_allow_html=True
41
  )
42
+
43
  df = st.session_state.get("dataset")
44
 
45
  # Exclude 'ProductID' from the dataset
 
49
  st.subheader("Dataset Preview:")
50
  st.write(df.head())
51
 
52
+ # Dropping unnecessary columns
53
+ df.drop(['age_bins', 'ProductPriceBucket', 'CustomerAgeGroup'], axis=1, inplace=True, errors='ignore')
54
+ st.write(df.head())
55
 
56
+ # Splitting Feature Variables and Class Labels
57
+ st.markdown("### Split Feature Variables and Class Labels")
58
+ fv = df.iloc[:, :-1]
59
+ cv = df.iloc[:, -1]
60
+ st.write(fv)
61
+ st.write(cv)
62
+
63
+ # Feature Engineering
64
+ st.markdown("### Feature Engineering")
65
+ label_encoder = LabelEncoder()
66
+ fv['ProductBrand'] = label_encoder.fit_transform(fv['ProductBrand'])
67
+ fv['ProductCategory'] = label_encoder.fit_transform(fv['ProductCategory'])
68
+ st.write(fv.head())
69
+
70
+ # Polynomial Featurisation for Non-Linearity
71
+ st.markdown("### Polynomial Featurisation for Non-Linearity:")
72
+ numeric_columns = fv.select_dtypes(include=[float, int]).columns
73
+ degree = 2
74
+ poly = PolynomialFeatures(degree=degree, include_bias=False)
75
+ poly_features = poly.fit_transform(fv[numeric_columns])
76
+ poly_feature_names = poly.get_feature_names_out(numeric_columns)
77
+ poly_df = pd.DataFrame(poly_features, columns=poly_feature_names)
78
+ fv_with_poly = pd.concat([fv.reset_index(drop=True), poly_df], axis=1)
79
+ fv_with_poly = fv_with_poly.loc[:, ~fv_with_poly.columns.duplicated()]
80
+ st.write(fv_with_poly.head())
81
+
82
+ # SMOTE for Handling Imbalanced Dataset
83
+ st.markdown("### SMOTE for Handling Imbalanced Dataset")
84
+ smote = SMOTE(sampling_strategy=1)
85
+ fv1, cv1 = smote.fit_resample(fv_with_poly, cv)
86
+ st.write(pd.Series(cv1).value_counts())
87
 
88
+ # Data Splitting
89
+ st.markdown("### Data Splitting")
90
+ x_train, x_test, y_train, y_test = train_test_split(fv1, cv1, test_size=0.2, random_state=42)
91
 
92
+ # Scaling
93
+ st.markdown("### Scaling")
94
+ std = StandardScaler()
95
+ x_train_std = std.fit_transform(x_train)
96
+ x_test_std = std.transform(x_test)
97
 
98
+ st.markdown("## Hyperparameter Tuning using OPTUNA")
99
+
100
+ # Define the objective function for Optuna
101
+ st.code("""
102
+ def objective(trial):
103
+ # Choose algorithm
104
+ algo = trial.suggest_categorical("algo", ["lor", "svc"])
105
+
106
+ if algo == "svc":
107
+ # SVC hyperparameters
108
+ c = trial.suggest_float("C", 0.001, 1000, log=True)
109
+ kernel = trial.suggest_categorical("kernel", ['linear', 'poly', 'rbf', 'sigmoid'])
110
+
111
+ if kernel == 'poly':
112
+ degree = trial.suggest_int("degree", 1, 3)
113
+ model = SVC(C=c, kernel=kernel, degree=degree, random_state=42)
114
+ elif kernel in ['rbf', 'sigmoid']:
115
+ gamma = trial.suggest_categorical("gamma", ['scale', 'auto'])
116
+ model = SVC(C=c, kernel=kernel, gamma=gamma, random_state=42)
117
+ else:
118
+ model = SVC(C=c, kernel=kernel, random_state=42)
119
+ else:
120
+ # Logistic Regression hyperparameters
121
+ solver, penalty = trial.suggest_categorical(
122
+ "choices", [
123
+ ("lbfgs", "l2"), ("newton-cg", "l2"),
124
+ ("sag", "l2"), ("saga", "l1"),
125
+ ("saga", "l2"), ("saga", "elasticnet")
126
+ ]
127
+ )
128
+ reg_strength = trial.suggest_float("C", 0.001, 1000, log=True)
129
+ l1_ratio = trial.suggest_float("l1_ratio", 0, 1) if penalty == "elasticnet" else None
130
+
131
+ if penalty == "elasticnet":
132
+ model = LogisticRegression(
133
+ solver=solver, penalty=penalty, C=reg_strength,
134
+ l1_ratio=l1_ratio, random_state=42
135
+ )
136
+ else:
137
+ model = LogisticRegression(
138
+ solver=solver, penalty=penalty, C=reg_strength, random_state=42
139
+ )
140
+
141
+ # Perform cross-validation and return the mean score
142
+ score = cross_val_score(model, x_train_std, y_train, cv=5, scoring="accuracy").mean()
143
+ return score
144
+ """, language="python")
145
+
146
+ # Create and optimize the study
147
+ st.code("""
148
+ study = optuna.create_study(direction="maximize")
149
+ study.optimize(objective, n_trials=100)
150
+
151
+ # Display the best parameters
152
+ st.write("Best Parameters:", study.best_params)
153
+ """, language="python")
154
+
155
+
156
+ # Create the best model
157
+ st.markdown("## Create the Best Model")
158
+ model = SVC(kernel='rbf', gamma='scale', C=53.123097332514455)
159
+ st.write(model)
160
+
161
+ # Train the model
162
+ st.markdown("### Train the Model")
163
+ model.fit(x_train_std, y_train)
164
+
165
+ # Model Evaluation
166
+ st.markdown("# Model Evaluation")
167
+ y_pred = model.predict(x_test_std)
168
+ st.write("Accuracy:", accuracy_score(y_test, y_pred))
169
+ st.write("Classification Report:\n", classification_report(y_test, y_pred))
170
+ st.write("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
171
+
172
+ else:
173
+ st.warning("No Dataset Found")
174
+
175
+ # Custom background styling
176
  st.markdown(
177
  """
178
  <style>
 
180
  background-image: url("https://cdn-uploads.huggingface.co/production/uploads/67441c51a784a9d15cb12871/FVcAdQ1wc7rCkfdnFsZft.jpeg");
181
  background-size: cover;
182
  background-position: center;
 
183
  }
 
 
184
  .stApp::before {
185
  content: "";
186
  position: absolute;
 
188
  left: 0;
189
  width: 100%;
190
  height: 100%;
191
+ background: rgba(0, 0, 0, 0.4);
192
  z-index: -1;
193
  }
194
  </style>
195
+ """,
196
  unsafe_allow_html=True
197
+ )