Update app.py
Browse files
app.py
CHANGED
|
@@ -194,19 +194,17 @@ def prepare_data(df, target_column=None):
|
|
| 194 |
X = df.drop(columns=[target_column])
|
| 195 |
y = df[target_column]
|
| 196 |
|
| 197 |
-
return train_test_split(X, y, test_size=0.
|
| 198 |
|
| 199 |
|
| 200 |
def train_model(_):
|
| 201 |
try:
|
| 202 |
wandb.login(key=os.environ.get("WANDB_API_KEY"))
|
| 203 |
-
run_counter = 1
|
| 204 |
wandb_run = wandb.init(
|
| 205 |
project="huggingface-data-analysis",
|
| 206 |
-
name=f"Optuna_Run_{
|
| 207 |
reinit=True
|
| 208 |
)
|
| 209 |
-
run_counter += 1
|
| 210 |
|
| 211 |
X_train, X_test, y_train, y_test = prepare_data(df_global)
|
| 212 |
|
|
@@ -215,7 +213,7 @@ def train_model(_):
|
|
| 215 |
"n_estimators": trial.suggest_int("n_estimators", 50, 200),
|
| 216 |
"max_depth": trial.suggest_int("max_depth", 3, 10),
|
| 217 |
}
|
| 218 |
-
model = RandomForestClassifier(
|
| 219 |
score = cross_val_score(model, X_train, y_train, cv=3).mean()
|
| 220 |
wandb.log({**params, "cv_score": score})
|
| 221 |
return score
|
|
@@ -224,9 +222,11 @@ def train_model(_):
|
|
| 224 |
study.optimize(objective, n_trials=15)
|
| 225 |
|
| 226 |
best_params = study.best_params
|
| 227 |
-
model = RandomForestClassifier(
|
| 228 |
model.fit(X_train, y_train)
|
| 229 |
y_pred = model.predict(X_test)
|
|
|
|
|
|
|
| 230 |
|
| 231 |
metrics = {
|
| 232 |
"accuracy": accuracy_score(y_test, y_pred),
|
|
|
|
| 194 |
X = df.drop(columns=[target_column])
|
| 195 |
y = df[target_column]
|
| 196 |
|
| 197 |
+
return train_test_split(X, y, test_size=0.3, random_state=42)
|
| 198 |
|
| 199 |
|
| 200 |
def train_model(_):
|
| 201 |
try:
|
| 202 |
wandb.login(key=os.environ.get("WANDB_API_KEY"))
|
|
|
|
| 203 |
wandb_run = wandb.init(
|
| 204 |
project="huggingface-data-analysis",
|
| 205 |
+
name=f"Optuna_Run_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
|
| 206 |
reinit=True
|
| 207 |
)
|
|
|
|
| 208 |
|
| 209 |
X_train, X_test, y_train, y_test = prepare_data(df_global)
|
| 210 |
|
|
|
|
| 213 |
"n_estimators": trial.suggest_int("n_estimators", 50, 200),
|
| 214 |
"max_depth": trial.suggest_int("max_depth", 3, 10),
|
| 215 |
}
|
| 216 |
+
model = RandomForestClassifier()
|
| 217 |
score = cross_val_score(model, X_train, y_train, cv=3).mean()
|
| 218 |
wandb.log({**params, "cv_score": score})
|
| 219 |
return score
|
|
|
|
| 222 |
study.optimize(objective, n_trials=15)
|
| 223 |
|
| 224 |
best_params = study.best_params
|
| 225 |
+
model = RandomForestClassifier()
|
| 226 |
model.fit(X_train, y_train)
|
| 227 |
y_pred = model.predict(X_test)
|
| 228 |
+
print(f"Trial {trial.number}, preds: {np.unique(y_pred, return_counts=True)}")
|
| 229 |
+
|
| 230 |
|
| 231 |
metrics = {
|
| 232 |
"accuracy": accuracy_score(y_test, y_pred),
|