opinder2906 commited on
Commit
55a920e
·
verified ·
1 Parent(s): b62d0ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -298
app.py CHANGED
@@ -1,343 +1,146 @@
1
- # -*- coding: utf-8 -*-
2
- """Try.ipynb
3
 
4
- Automatically generated by Colab.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1OBe8cQMTtii9Xh1Ak5ayDewo_4UvTSD-
8
- """
9
-
10
- # Step 1: Imports & Data Load
11
  import pandas as pd
12
  import numpy as np
13
  import seaborn as sns
14
  import matplotlib.pyplot as plt
15
 
16
- from sklearn.model_selection import train_test_split, StratifiedShuffleSplit, GridSearchCV
17
- from sklearn.preprocessing import LabelEncoder, StandardScaler
18
  from sklearn.impute import SimpleImputer
19
  from sklearn.decomposition import PCA
20
  from sklearn.manifold import TSNE
21
- from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier # Added RandomForestClassifier here
 
 
 
 
 
 
 
 
 
 
22
 
23
- print("\n1. DATA LOADING & INITIAL INSPECTION …………………………………………")
 
 
 
 
24
 
25
- url = "https://drive.google.com/uc?export=download&id=1QBTnXxORRbJzE5Z2aqKHsVqgB7mqowiN"
26
- df = pd.read_csv(url)
27
- print(df.head(3))
28
- print("Shape:", df.shape)
29
 
30
- # Check nulls
31
- print(df.isna().sum())
32
- # Fill object columns with mode, number columns with median
33
  for col in df.select_dtypes(include='object').columns:
34
  df[col] = df[col].fillna(df[col].mode()[0])
35
  for col in df.select_dtypes(include=np.number).columns:
36
  df[col] = df[col].fillna(df[col].median())
37
 
38
- # Outlier removal (IQR method, numeric columns)
39
- num_cols = df.select_dtypes(include=np.number).columns
40
- Q1 = df[num_cols].quantile(0.25)
41
- Q3 = df[num_cols].quantile(0.75)
42
  IQR = Q3 - Q1
43
- mask = ~((df[num_cols] < (Q1 - 1.5 * IQR)) | (df[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)
44
- df = df[mask]
45
 
46
- # Encode categorical columns
47
- from sklearn.preprocessing import LabelEncoder
48
  cat_cols = df.select_dtypes(include='object').columns
49
- le_dict = {}
50
  for col in cat_cols:
51
  le = LabelEncoder()
52
  df[col] = le.fit_transform(df[col])
53
- le_dict[col] = le # Save for later decoding if needed
54
-
55
- print(df.head())
56
-
57
- # Univariate analysis: Numeric
58
- num_cols = df.select_dtypes(include=['int64', 'float64']).columns
59
- for col in num_cols:
60
- plt.figure(figsize=(6,3))
61
- sns.histplot(df[col].dropna(), kde=True)
62
- plt.title(f'Distribution of {col}')
63
- plt.show()
64
-
65
- if 'Make' in df.columns and 'Electric Range' in df.columns:
66
- plt.figure(figsize=(12,6))
67
- sns.boxplot(x='Make', y='Electric Range', data=df)
68
- plt.xticks(rotation=90)
69
- plt.title('Electric Range by Make')
70
- plt.show()
71
-
72
- # Pairplot of main variables (sample for large datasets)
73
- sample_df = df.sample(min(1000, len(df)), random_state=42)
74
- if len(num_cols) > 1:
75
- sns.pairplot(sample_df[num_cols])
76
- plt.suptitle('Pairplot of Numeric Features', y=1.02)
77
- plt.show()
78
-
79
- import matplotlib.pyplot as plt
80
- import seaborn as sns
81
-
82
- # Assume df is already loaded
83
- num_cols = df.select_dtypes(include=['int64', 'float64']).columns
84
- corr = df[num_cols].corr()
85
-
86
- plt.figure(figsize=(10, 7))
87
- sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm')
88
- plt.title('Correlation Heatmap for Numeric Columns')
89
- plt.show()
90
 
91
- from sklearn.preprocessing import StandardScaler
92
- from sklearn.ensemble import RandomForestClassifier
93
-
94
- # Example new feature: Vehicle Age (if 'Model Year' exists)
95
  if 'Model Year' in df.columns:
96
  df['Vehicle_Age'] = 2025 - df['Model Year']
97
 
98
- # Scaling
99
- scaler = StandardScaler()
100
- X_scaled = scaler.fit_transform(df.drop('Electric Range', axis=1)) # Assume Electric Range is your target
101
-
102
- # Feature Selection (Random Forest importance)
103
- y = (df['Electric Range'] > df['Electric Range'].median()).astype(int) # Binary target
104
- rf_fs = RandomForestClassifier(n_estimators=100, random_state=42)
105
- rf_fs.fit(X_scaled, y)
106
- importances = rf_fs.feature_importances_
107
- top_idx = np.argsort(importances)[::-1][:10]
108
- top_features = df.drop('Electric Range', axis=1).columns[top_idx]
109
- print("Top features:", top_features)
110
-
111
- # Feature extraction (PCA)
112
- from sklearn.decomposition import PCA
113
- pca = PCA(n_components=2, random_state=42)
114
- X_pca = pca.fit_transform(df[top_features])
115
-
116
- import matplotlib.pyplot as plt
117
- plt.figure(figsize=(7,5))
118
- plt.scatter(X_pca[:,0], X_pca[:,1], c=y, cmap='viridis', alpha=0.5)
119
- plt.title("PCA of Top Features")
120
- plt.xlabel("PC1")
121
- plt.ylabel("PC2")
122
- plt.show()
123
-
124
- from sklearn.model_selection import train_test_split
125
-
126
- # Subsample (optional, for balanced classes)
127
- df_balanced = df.groupby(y).apply(lambda x: x.sample(min(len(x), 300), random_state=42)).reset_index(drop=True)
128
- X = df_balanced[top_features]
129
- y_bal = (df_balanced['Electric Range'] > df_balanced['Electric Range'].median()).astype(int)
130
- X_train, X_test, y_train, y_test = train_test_split(X, y_bal, test_size=0.3, random_state=42, stratify=y_bal)
131
-
132
- from sklearn.decomposition import PCA
133
- from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
134
- import matplotlib.pyplot as plt
135
- import seaborn as sns
136
-
137
- # Apply PCA
138
- pca = PCA(n_components=2)
139
- X_pca = pca.fit_transform(X_train)
140
-
141
- # Plot PCA results
142
- plt.figure(figsize=(8, 6))
143
- sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=y_train, palette='Set1', s=60)
144
- plt.title("PCA - First 2 Principal Components")
145
- plt.xlabel("PC1")
146
- plt.ylabel("PC2")
147
- plt.legend(title="Electric Vehicle Type") # Note: The legend title 'Cover_Type' might be a copy-paste error from another project. It should ideally reflect the actual target variable name if desired.
148
- plt.grid(True)
149
- plt.tight_layout()
150
- plt.show()
151
-
152
- # Apply LDA
153
- # Change n_components to 1 as max_components is min(n_features, n_classes - 1) = min(10, 2 - 1) = 1
154
- lda = LDA(n_components=1)
155
- X_lda = lda.fit_transform(X_train, y_train)
156
-
157
- # Plot LDA results
158
- plt.figure(figsize=(8, 6))
159
- # LDA with n_components=1 results in a 1D array. You typically plot this on a line or use a histogram.
160
- # Plotting against a dummy variable or the class label itself can show separation.
161
- # Here, we plot it on the x-axis against a constant y-value or jittered y-values for visualization.
162
- # A more informative plot might be a histogram of LD1 values for each class.
163
- sns.histplot(x=X_lda[:, 0], hue=y_train, kde=True, palette='Set2')
164
- plt.title("LDA - First Linear Discriminant")
165
- plt.xlabel("LD1")
166
- plt.ylabel("Density")
167
- plt.legend(title="Electric Vehicle Type")
168
- plt.grid(True)
169
- plt.tight_layout()
170
- plt.show()
171
 
172
- from sklearn.linear_model import LogisticRegression
173
- from sklearn.svm import SVC
174
- from sklearn.ensemble import GradientBoostingClassifier
175
- from sklearn.naive_bayes import GaussianNB
176
- from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, RocCurveDisplay
177
- import matplotlib.pyplot as plt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
- # Store models and results
180
  models = {
181
- 'Logistic Regression': LogisticRegression(max_iter=1000, penalty='l2', random_state=42),
182
- 'SVM': SVC(kernel='rbf', C=1.0, probability=True, random_state=42),
183
- 'Gradient Boosting': GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42),
184
  'Naive Bayes': GaussianNB()
185
  }
186
 
187
  for name, model in models.items():
188
  model.fit(X_train, y_train)
189
  y_pred = model.predict(X_test)
190
- print(f"\n===== {name} =====")
191
- print(classification_report(y_test, y_pred))
192
- print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
193
- # ROC-AUC and curve if possible
 
194
  if hasattr(model, "predict_proba"):
195
- proba = model.predict_proba(X_test)[:, 1]
196
- auc = roc_auc_score(y_test, proba)
197
- print("ROC-AUC:", auc)
198
  RocCurveDisplay.from_estimator(model, X_test, y_test)
199
- plt.title(f"{name} ROC Curve")
200
- plt.show()
201
- else:
202
- print("ROC-AUC not available for this model.")
203
-
204
- # Gradient Boosting with Binning
205
- from sklearn.preprocessing import KBinsDiscretizer
206
 
207
- binning = KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='quantile')
208
- X_train_binned = binning.fit_transform(X_train)
209
- X_test_binned = binning.transform(X_test)
210
- gbc_bin = GradientBoostingClassifier()
211
- gbc_bin.fit(X_train_binned, y_train)
212
- y_pred_gbc_bin = gbc_bin.predict(X_test_binned)
213
- print("Gradient Boosting (Optimal Binning) Results:\n", classification_report(y_test, y_pred_gbc_bin))
214
- print("Confusion matrix:\n", confusion_matrix(y_test, y_pred_gbc_bin))
215
 
216
- from sklearn.metrics import roc_auc_score, RocCurveDisplay
217
- import matplotlib.pyplot as plt
218
-
219
- models_to_plot = {
220
- 'NB': models['Naive Bayes'],
221
- 'LR': models['Logistic Regression'],
222
- 'SVM': models['SVM'],
223
- 'GBC': models['Gradient Boosting'],
224
- 'GBC_bin': gbc_bin # gbc_bin was defined in the previous cell (ipython-input-11)
225
- }
226
-
227
- for name, model in models_to_plot.items():
228
- if hasattr(model, "predict_proba"):
229
- RocCurveDisplay.from_estimator(model, X_test, y_test)
230
- plt.title(name + " ROC Curve")
231
- plt.show()
232
- print(f"{name} ROC-AUC:", roc_auc_score(y_test, model.predict_proba(X_test)[:,1]))
233
- elif hasattr(model, "decision_function"):
234
- RocCurveDisplay.from_estimator(model, X_test, y_test)
235
- plt.title(name + " ROC Curve")
236
- plt.show()
237
-
238
- from sklearn.model_selection import RandomizedSearchCV
239
- from sklearn.ensemble import GradientBoostingClassifier
240
- from sklearn.linear_model import LogisticRegression
241
- from sklearn.svm import SVC
242
- from sklearn.naive_bayes import GaussianNB
243
- from scipy.stats import uniform, randint
244
 
245
- # Use a smaller subset for tuning (optional, but helps)
246
- X_sample = X_train.sample(n=min(2000, len(X_train)), random_state=42)
247
- y_sample = y_train.loc[X_sample.index]
248
 
249
- # Parameter distributions
250
- param_dist_lr = {
251
- 'C': uniform(0.01, 10),
252
- 'penalty': ['l2'],
253
- 'solver': ['lbfgs']
254
- }
255
- param_dist_svm = {
256
- 'C': uniform(0.1, 10)
257
- }
258
- param_dist_gbc = {
259
- 'n_estimators': randint(50, 200),
260
- 'learning_rate': uniform(0.01, 0.2),
261
- 'max_depth': randint(3, 7)
262
- }
263
- param_dist_nb = {}
264
-
265
- n_iter_search = 10 # Try 10 random combinations per model
266
-
267
- # Logistic Regression
268
- rs_lr = RandomizedSearchCV(
269
- LogisticRegression(max_iter=1000, random_state=42),
270
- param_distributions=param_dist_lr,
271
- n_iter=n_iter_search, cv=3, scoring='accuracy', n_jobs=-1, random_state=42)
272
- rs_lr.fit(X_sample, y_sample)
273
- print("Best Logistic Regression params:", rs_lr.best_params_)
274
-
275
- # SVM
276
-
277
- # Run randomized search for SVM
278
- # The original code defines rs_svm_linear but never fits it and then tries to access rs_svm.best_estimator_
279
- # Let's assume the user intended to run RandomizedSearchCV for the general SVM param_dist_svm
280
- rs_svm = RandomizedSearchCV(
281
- SVC(random_state=42, max_iter=5000),
282
- param_distributions=param_dist_svm, # Use the general SVM parameter distribution
283
- n_iter=5, # Use n_iter_search for consistency
284
- cv=2,
285
- scoring='accuracy',
286
- n_jobs=-1,
287
- random_state=42
288
- )
289
- rs_svm.fit(X_sample, y_sample) # Fit the SVM RandomizedSearchCV
290
- print("Best SVM params:", rs_svm.best_params_)
291
-
292
-
293
- # Gradient Boosting
294
- rs_gbc = RandomizedSearchCV(
295
- # Removed n_bins, encode, strategy as they are not arguments for GBC
296
- GradientBoostingClassifier(random_state = 42),
297
- param_distributions=param_dist_gbc,
298
- n_iter=n_iter_search, cv=3, scoring='accuracy', n_jobs=-1, random_state=42)
299
- rs_gbc.fit(X_sample, y_sample)
300
- print("Best Gradient Boosting params:", rs_gbc.best_params_)
301
-
302
- # Naive Bayes (no real params, but for consistency)
303
- rs_nb = RandomizedSearchCV(
304
- GaussianNB(), param_distributions=param_dist_nb,
305
- n_iter=1, cv=3, scoring='accuracy', random_state=42)
306
- rs_nb.fit(X_sample, y_sample)
307
- print("Best Naive Bayes params:", rs_nb.best_params_) # Print best params for NB as well
308
-
309
- # Evaluate best estimators on full test set
310
- print("\n--- Test Set Evaluation ---")
311
- print("LR Test Accuracy:", rs_lr.best_estimator_.score(X_test, y_test))
312
- print("SVM Test Accuracy:", rs_svm.best_estimator_.score(X_test, y_test)) # Use rs_svm
313
- print("GBC Test Accuracy:", rs_gbc.best_estimator_.score(X_test, y_test))
314
- print("NB Test Accuracy:", rs_nb.best_estimator_.score(X_test, y_test))
315
-
316
- from sklearn.decomposition import PCA
317
- import matplotlib.pyplot as plt
318
-
319
- pca = PCA(n_components=2, random_state=42)
320
- X_pca = pca.fit_transform(X)
321
-
322
- plt.figure(figsize=(8,6))
323
- plt.scatter(X_pca[:,0], X_pca[:,1], c=y_bal, cmap='coolwarm', alpha=0.6)
324
- plt.title("PCA Projection of Data")
325
- plt.xlabel("Principal Component 1")
326
- plt.ylabel("Principal Component 2")
327
- plt.colorbar(label='Class')
328
- plt.show()
329
-
330
- from sklearn.manifold import TSNE
331
 
332
- # t-SNE on top features
333
- tsne = TSNE(n_components=2, random_state=42)
334
- X_tsne = tsne.fit_transform(X)
335
 
336
- plt.figure(figsize=(8,5))
337
- # Use y_bal for coloring as it corresponds to the subsampled data X
338
- plt.scatter(X_tsne[:,0], X_tsne[:,1], c=y_bal, cmap='plasma', alpha=0.7)
339
- plt.title("t-SNE of Features")
340
- plt.xlabel("t-SNE1")
341
- plt.ylabel("t-SNE2")
342
- plt.show()
343
 
 
 
 
 
 
 
1
 
2
+ import streamlit as st
 
 
 
 
 
 
3
  import pandas as pd
4
  import numpy as np
5
  import seaborn as sns
6
  import matplotlib.pyplot as plt
7
 
8
+ from sklearn.model_selection import train_test_split, RandomizedSearchCV
9
+ from sklearn.preprocessing import LabelEncoder, StandardScaler, KBinsDiscretizer
10
  from sklearn.impute import SimpleImputer
11
  from sklearn.decomposition import PCA
12
  from sklearn.manifold import TSNE
13
+ from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
14
+ from sklearn.linear_model import LogisticRegression
15
+ from sklearn.naive_bayes import GaussianNB
16
+ from sklearn.svm import SVC
17
+ from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, RocCurveDisplay
18
+ from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
19
+ from scipy.stats import uniform, randint
20
+
21
+ st.set_option('deprecation.showPyplotGlobalUse', False)
22
+
23
+ st.title("Electric Vehicle ML Pipeline Dashboard")
24
 
25
+ # Load dataset
26
+ @st.cache_data
27
+ def load_data():
28
+ url = "https://drive.google.com/uc?export=download&id=1QBTnXxORRbJzE5Z2aqKHsVqgB7mqowiN"
29
+ return pd.read_csv(url)
30
 
31
+ df = load_data()
32
+ st.subheader("1. Dataset Preview")
33
+ st.write(df.head())
 
34
 
35
+ # Fill missing values
 
 
36
  for col in df.select_dtypes(include='object').columns:
37
  df[col] = df[col].fillna(df[col].mode()[0])
38
  for col in df.select_dtypes(include=np.number).columns:
39
  df[col] = df[col].fillna(df[col].median())
40
 
41
+ # Outlier Removal
42
+ Q1 = df.quantile(0.25)
43
+ Q3 = df.quantile(0.75)
 
44
  IQR = Q3 - Q1
45
+ df = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)]
 
46
 
47
+ # Encoding
 
48
  cat_cols = df.select_dtypes(include='object').columns
 
49
  for col in cat_cols:
50
  le = LabelEncoder()
51
  df[col] = le.fit_transform(df[col])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ # Feature Engineering
 
 
 
54
  if 'Model Year' in df.columns:
55
  df['Vehicle_Age'] = 2025 - df['Model Year']
56
 
57
+ # Modeling Prep
58
+ target = 'Electric Range'
59
+ y = (df[target] > df[target].median()).astype(int)
60
+ X = df.drop(columns=[target])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ # Feature Selection
63
+ scaler = StandardScaler()
64
+ X_scaled = scaler.fit_transform(X)
65
+ rf = RandomForestClassifier(random_state=42)
66
+ rf.fit(X_scaled, y)
67
+ top_features = pd.Series(rf.feature_importances_, index=X.columns).nlargest(10).index.tolist()
68
+ X = df[top_features]
69
+
70
+ # Subsample for balance
71
+ df['Target'] = y
72
+ df_bal = df.groupby('Target').apply(lambda x: x.sample(min(len(x), 300), random_state=42)).reset_index(drop=True)
73
+ X = df_bal[top_features]
74
+ y = df_bal['Target']
75
+ X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)
76
+
77
+ # Visualization
78
+ st.subheader("2. Data Visualization")
79
+
80
+ if st.checkbox("Show Correlation Heatmap"):
81
+ plt.figure(figsize=(10, 6))
82
+ sns.heatmap(df[top_features + ['Target']].corr(), annot=True, cmap='coolwarm')
83
+ st.pyplot()
84
+
85
+ if st.checkbox("Show PCA Plot"):
86
+ pca = PCA(n_components=2)
87
+ X_pca = pca.fit_transform(X)
88
+ plt.figure(figsize=(8, 5))
89
+ plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='viridis', alpha=0.6)
90
+ plt.title("PCA Projection")
91
+ st.pyplot()
92
+
93
+ if st.checkbox("Show t-SNE Plot"):
94
+ tsne = TSNE(n_components=2, random_state=42)
95
+ X_tsne = tsne.fit_transform(X)
96
+ plt.figure(figsize=(8, 5))
97
+ plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y, cmap='plasma', alpha=0.7)
98
+ plt.title("t-SNE Projection")
99
+ st.pyplot()
100
+
101
+ # Model Training
102
+ st.subheader("3. Model Training & Evaluation")
103
 
 
104
  models = {
105
+ 'Logistic Regression': LogisticRegression(max_iter=1000),
106
+ 'SVM': SVC(probability=True),
107
+ 'Gradient Boosting': GradientBoostingClassifier(),
108
  'Naive Bayes': GaussianNB()
109
  }
110
 
111
  for name, model in models.items():
112
  model.fit(X_train, y_train)
113
  y_pred = model.predict(X_test)
114
+ st.write(f"### {name}")
115
+ st.text("Classification Report")
116
+ st.text(classification_report(y_test, y_pred))
117
+ st.text("Confusion Matrix")
118
+ st.write(confusion_matrix(y_test, y_pred))
119
  if hasattr(model, "predict_proba"):
 
 
 
120
  RocCurveDisplay.from_estimator(model, X_test, y_test)
121
+ st.pyplot()
 
 
 
 
 
 
122
 
123
+ # Hyperparameter Tuning
124
+ st.subheader("4. Hyperparameter Tuning Summary")
 
 
 
 
 
 
125
 
126
+ if st.checkbox("Run Tuning"):
127
+ st.info("Running tuning... may take a few minutes")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
+ param_dist_lr = {'C': uniform(0.01, 10), 'penalty': ['l2'], 'solver': ['lbfgs']}
130
+ param_dist_svm = {'C': uniform(0.1, 10)}
131
+ param_dist_gbc = {'n_estimators': randint(50, 150), 'learning_rate': uniform(0.01, 0.2), 'max_depth': randint(3, 6)}
132
 
133
+ sample_X = X_train.sample(min(1000, len(X_train)), random_state=42)
134
+ sample_y = y_train.loc[sample_X.index]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ rs_lr = RandomizedSearchCV(LogisticRegression(max_iter=1000), param_distributions=param_dist_lr, n_iter=10, cv=3)
137
+ rs_lr.fit(sample_X, sample_y)
138
+ st.write("Best Logistic Regression:", rs_lr.best_params_)
139
 
140
+ rs_svm = RandomizedSearchCV(SVC(probability=True), param_distributions=param_dist_svm, n_iter=5, cv=2)
141
+ rs_svm.fit(sample_X, sample_y)
142
+ st.write("Best SVM:", rs_svm.best_params_)
 
 
 
 
143
 
144
+ rs_gbc = RandomizedSearchCV(GradientBoostingClassifier(), param_distributions=param_dist_gbc, n_iter=10, cv=3)
145
+ rs_gbc.fit(sample_X, sample_y)
146
+ st.write("Best Gradient Boosting:", rs_gbc.best_params_)