pavanmutha commited on
Commit
c80e58f
·
verified ·
1 Parent(s): 7e1b7d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -194,19 +194,17 @@ def prepare_data(df, target_column=None):
194
  X = df.drop(columns=[target_column])
195
  y = df[target_column]
196
 
197
- return train_test_split(X, y, test_size=0.2, random_state=42)
198
 
199
 
200
  def train_model(_):
201
  try:
202
  wandb.login(key=os.environ.get("WANDB_API_KEY"))
203
- run_counter = 1
204
  wandb_run = wandb.init(
205
  project="huggingface-data-analysis",
206
- name=f"Optuna_Run_{run_counter}",
207
  reinit=True
208
  )
209
- run_counter += 1
210
 
211
  X_train, X_test, y_train, y_test = prepare_data(df_global)
212
 
@@ -215,7 +213,7 @@ def train_model(_):
215
  "n_estimators": trial.suggest_int("n_estimators", 50, 200),
216
  "max_depth": trial.suggest_int("max_depth", 3, 10),
217
  }
218
- model = RandomForestClassifier(**params)
219
  score = cross_val_score(model, X_train, y_train, cv=3).mean()
220
  wandb.log({**params, "cv_score": score})
221
  return score
@@ -224,9 +222,11 @@ def train_model(_):
224
  study.optimize(objective, n_trials=15)
225
 
226
  best_params = study.best_params
227
- model = RandomForestClassifier(**best_params)
228
  model.fit(X_train, y_train)
229
  y_pred = model.predict(X_test)
 
 
230
 
231
  metrics = {
232
  "accuracy": accuracy_score(y_test, y_pred),
 
194
  X = df.drop(columns=[target_column])
195
  y = df[target_column]
196
 
197
+ return train_test_split(X, y, test_size=0.3, random_state=42)
198
 
199
 
200
  def train_model(_):
201
  try:
202
  wandb.login(key=os.environ.get("WANDB_API_KEY"))
 
203
  wandb_run = wandb.init(
204
  project="huggingface-data-analysis",
205
+ name=f"Optuna_Run_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
206
  reinit=True
207
  )
 
208
 
209
  X_train, X_test, y_train, y_test = prepare_data(df_global)
210
 
 
213
  "n_estimators": trial.suggest_int("n_estimators", 50, 200),
214
  "max_depth": trial.suggest_int("max_depth", 3, 10),
215
  }
216
+ model = RandomForestClassifier()
217
  score = cross_val_score(model, X_train, y_train, cv=3).mean()
218
  wandb.log({**params, "cv_score": score})
219
  return score
 
222
  study.optimize(objective, n_trials=15)
223
 
224
  best_params = study.best_params
225
+ model = RandomForestClassifier()
226
  model.fit(X_train, y_train)
227
  y_pred = model.predict(X_test)
228
+ print(f"Trial {trial.number}, preds: {np.unique(y_pred, return_counts=True)}")
229
+
230
 
231
  metrics = {
232
  "accuracy": accuracy_score(y_test, y_pred),