pavanmutha commited on
Commit
c70b4e1
·
verified ·
1 Parent(s): d1a62b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -18
app.py CHANGED
@@ -14,10 +14,11 @@ import shutil
14
  import ast
15
  from smolagents import HfApiModel, CodeAgent
16
  from huggingface_hub import login
17
- from sklearn.ensemble import RandomForestClassifier
18
  from sklearn.model_selection import train_test_split, cross_val_score
19
  from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
20
  from sklearn.metrics import ConfusionMatrixDisplay
 
 
21
  from sklearn.preprocessing import LabelEncoder
22
  from PIL import Image
23
 
@@ -189,30 +190,62 @@ def train_model(_):
189
  wandb_run = wandb.init(project="huggingface-data-analysis", name=f"Optuna_Run_{run_counter}", reinit=True)
190
  run_counter += 1
191
 
 
 
 
 
 
 
 
192
  def prepare_data():
193
- """Prepares the dataset by splitting into X and y, and returns training and test sets."""
194
- global df_global, X_train, X_test, y_train, y_test
195
-
196
- # Check if df_global is None, which means no file has been uploaded yet
197
  if df_global is None:
198
- raise ValueError("DataFrame is None. Please upload a dataset first.")
199
 
200
  target = df_global.columns[-1]
201
- X = df_global.drop(target, axis=1)
202
  y = df_global[target]
203
 
204
- if y.dtype == 'object':
205
- y = LabelEncoder().fit_transform(y)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
208
- return X_train, X_test, y_train, y_test
209
-
210
- # Prepare the data before the optimization process, with a check for df_global
211
- try:
212
- X_train, X_test, y_train, y_test = prepare_data()
213
- except ValueError as e:
214
- print(e) # You can log this or return it as a message in the UI
215
- # Handle the error by returning or setting defaults as needed.
216
 
217
 
218
  def objective(trial):
 
14
  import ast
15
  from smolagents import HfApiModel, CodeAgent
16
  from huggingface_hub import login
 
17
  from sklearn.model_selection import train_test_split, cross_val_score
18
  from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
19
  from sklearn.metrics import ConfusionMatrixDisplay
20
+ from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
21
+ from sklearn.linear_model import LogisticRegression
22
  from sklearn.preprocessing import LabelEncoder
23
  from PIL import Image
24
 
 
190
  wandb_run = wandb.init(project="huggingface-data-analysis", name=f"Optuna_Run_{run_counter}", reinit=True)
191
  run_counter += 1
192
 
193
+ import optuna
194
+ from sklearn.model_selection import train_test_split, cross_val_score
195
+ from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
196
+ from sklearn.linear_model import LogisticRegression
197
+ import pandas as pd
198
+
199
+
200
  def prepare_data():
201
+ global df_global
 
 
 
202
  if df_global is None:
203
+ raise ValueError("No dataset uploaded.")
204
 
205
  target = df_global.columns[-1]
206
+ X = df_global.iloc[:, :-1]
207
  y = df_global[target]
208
 
209
+ return train_test_split(X, y, test_size=0.2, random_state=42)
210
+
211
+ def make_objective(X_train, y_train):
212
+ def objective(trial):
213
+ model_type = trial.suggest_categorical("model_type", ["RandomForest", "GradientBoosting", "LogisticRegression"])
214
+
215
+ if model_type == "RandomForest":
216
+ model = RandomForestClassifier(
217
+ n_estimators=trial.suggest_int("n_estimators", 50, 300),
218
+ max_depth=trial.suggest_int("max_depth", 2, 32)
219
+ )
220
+ elif model_type == "GradientBoosting":
221
+ model = GradientBoostingClassifier(
222
+ n_estimators=trial.suggest_int("n_estimators", 50, 300),
223
+ learning_rate=trial.suggest_float("learning_rate", 0.01, 0.3),
224
+ max_depth=trial.suggest_int("max_depth", 2, 32)
225
+ )
226
+ else:
227
+ model = LogisticRegression(
228
+ C=trial.suggest_float("C", 1e-3, 1e2),
229
+ solver="liblinear"
230
+ )
231
+
232
+ score = cross_val_score(model, X_train, y_train, cv=3).mean()
233
+ return score
234
+
235
+ return objective
236
+
237
+ # ✅ Call the functions in order
238
+ X_train, X_test, y_train, y_test = prepare_data()
239
+ objective = make_objective(X_train, y_train) # 👈 wrap with your train data
240
+
241
+ # ✅ Now run optimization
242
+ study = optuna.create_study(direction="maximize")
243
+ study.optimize(objective, n_trials=15)
244
+
245
+ # ✅ Print the best params
246
+ print("Best trial:")
247
+ print(study.best_trial)
248
 
 
 
 
 
 
 
 
 
 
249
 
250
 
251
  def objective(trial):