Update app.py
Browse files
app.py
CHANGED
|
@@ -40,31 +40,15 @@ def clean_data(df):
|
|
| 40 |
df = df.fillna(df.mean(numeric_only=True))
|
| 41 |
return df
|
| 42 |
|
| 43 |
-
def upload_file(
|
| 44 |
-
"""
|
| 45 |
-
file_path comes in as a str because type="filepath".
|
| 46 |
-
We catch any error and return a tiny DataFrame with the message.
|
| 47 |
-
"""
|
| 48 |
global df_global
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
df = pd.read_excel(file_path)
|
| 57 |
-
else:
|
| 58 |
-
raise ValueError(f"Unsupported extension: {ext}")
|
| 59 |
-
|
| 60 |
-
df = clean_data(df)
|
| 61 |
-
df_global = df
|
| 62 |
-
return df.head()
|
| 63 |
-
|
| 64 |
-
except Exception as e:
|
| 65 |
-
# Return a 1Γ1 DataFrame so Gradio wonβt crash
|
| 66 |
-
return pd.DataFrame({"Error": [str(e)]})
|
| 67 |
-
|
| 68 |
|
| 69 |
def format_analysis_report(raw_output, visuals):
|
| 70 |
try:
|
|
@@ -268,70 +252,10 @@ def train_model(_):
|
|
| 268 |
print(f"Training Error: {e}")
|
| 269 |
return {}, pd.DataFrame()
|
| 270 |
|
| 271 |
-
def ab_test_models():
|
| 272 |
-
global df_global
|
| 273 |
-
if df_global is None:
|
| 274 |
-
return "β οΈ Please upload and preprocess a dataset first.", pd.DataFrame()
|
| 275 |
-
|
| 276 |
-
try:
|
| 277 |
-
# split off last column as target
|
| 278 |
-
target = df_global.columns[-1]
|
| 279 |
-
X = df_global.drop(columns=[target])
|
| 280 |
-
y = df_global[target]
|
| 281 |
-
if y.dtype == 'object':
|
| 282 |
-
y = LabelEncoder().fit_transform(y)
|
| 283 |
-
|
| 284 |
-
X_train, X_test, y_train, y_test = train_test_split(
|
| 285 |
-
X, y, test_size=0.3, random_state=42
|
| 286 |
-
)
|
| 287 |
-
|
| 288 |
-
models = {
|
| 289 |
-
"Random Forest": RandomForestClassifier(n_estimators=100),
|
| 290 |
-
"Logistic Regression": LogisticRegression(max_iter=1000),
|
| 291 |
-
"Gradient Boosting": GradientBoostingClassifier()
|
| 292 |
-
}
|
| 293 |
-
|
| 294 |
-
results = []
|
| 295 |
-
for name, clf in models.items():
|
| 296 |
-
clf.fit(X_train, y_train)
|
| 297 |
-
y_pred = clf.predict(X_test)
|
| 298 |
-
metrics = {
|
| 299 |
-
"Model": name,
|
| 300 |
-
"Accuracy": accuracy_score(y_test, y_pred),
|
| 301 |
-
"Precision": precision_score(y_test, y_pred, average="weighted", zero_division=0),
|
| 302 |
-
"Recall": recall_score(y_test, y_pred, average="weighted", zero_division=0),
|
| 303 |
-
"F1 Score": f1_score(y_test, y_pred, average="weighted", zero_division=0)
|
| 304 |
-
}
|
| 305 |
-
results.append(metrics)
|
| 306 |
-
|
| 307 |
-
# safe WandB logging
|
| 308 |
-
try:
|
| 309 |
-
wandb.log({f"{name}_metrics": metrics})
|
| 310 |
-
except Exception as e:
|
| 311 |
-
print(f"[WARN] WandB log failed for {name}: {e}")
|
| 312 |
-
|
| 313 |
-
result_df = pd.DataFrame(results)
|
| 314 |
-
best = result_df.sort_values("F1 Score", ascending=False).iloc[0]
|
| 315 |
-
|
| 316 |
-
summary = f"""
|
| 317 |
-
π <b>Best Model:</b> {best['Model']}<br>
|
| 318 |
-
β
<b>F1 Score:</b> {best['F1 Score']:.4f}<br>
|
| 319 |
-
π <b>Accuracy:</b> {best['Accuracy']:.4f}<br>
|
| 320 |
-
π§ <b>Precision:</b> {best['Precision']:.4f}<br>
|
| 321 |
-
π <b>Recall:</b> {best['Recall']:.4f}
|
| 322 |
-
"""
|
| 323 |
-
return summary, result_df
|
| 324 |
-
|
| 325 |
-
except Exception as err:
|
| 326 |
-
return f"β Error during A/B testing:<br>{err}", pd.DataFrame()
|
| 327 |
-
|
| 328 |
|
| 329 |
def explainability(_):
|
| 330 |
-
global df_global
|
| 331 |
import warnings
|
| 332 |
warnings.filterwarnings("ignore")
|
| 333 |
-
if df_global is None:
|
| 334 |
-
return None, None
|
| 335 |
|
| 336 |
target = df_global.columns[-1]
|
| 337 |
X = df_global.drop(target, axis=1)
|
|
@@ -407,40 +331,29 @@ def explainability(_):
|
|
| 407 |
with gr.Blocks() as demo:
|
| 408 |
gr.Markdown("## π AI-Powered Data Analysis with Hyperparameter Optimization")
|
| 409 |
|
| 410 |
-
# βββββββββββββββββββββββ Upload & Preview βββββββββββββββββββββββ
|
| 411 |
with gr.Row():
|
| 412 |
with gr.Column():
|
| 413 |
file_input = gr.File(label="Upload CSV or Excel", type="filepath")
|
| 414 |
-
df_output
|
| 415 |
-
file_input.change(fn=upload_file, inputs=
|
| 416 |
|
| 417 |
with gr.Column():
|
| 418 |
-
insights_output = gr.HTML(label="Insights")
|
| 419 |
-
|
| 420 |
-
agent_btn
|
| 421 |
|
| 422 |
-
# βββββββββββββββββββ Hyperopt + Trials ββοΏ½οΏ½ββββββββββββββββ
|
| 423 |
with gr.Row():
|
| 424 |
-
train_btn
|
| 425 |
-
metrics_output = gr.JSON(label="Metrics")
|
| 426 |
-
trials_output
|
| 427 |
|
| 428 |
-
# βββββββββββββββββββ Explainability βββββββββββββββββββ
|
| 429 |
with gr.Row():
|
| 430 |
explain_btn = gr.Button("SHAP + LIME Explainability")
|
| 431 |
-
shap_img
|
| 432 |
-
lime_img
|
| 433 |
-
|
| 434 |
-
# βββββββββββββββββββ A/B Testing βββββββββββββββββββ
|
| 435 |
-
with gr.Row():
|
| 436 |
-
ab_test_button = gr.Button("Run A/B Testing")
|
| 437 |
-
ab_summary = gr.HTML(label="A/B Test Summary")
|
| 438 |
-
ab_results = gr.DataFrame(label="A/B Test Results")
|
| 439 |
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
# explain_btn.click(fn=explainability, inputs=[], outputs=[shap_img, lime_img])
|
| 444 |
-
ab_test_button.click(fn=ab_test_models, inputs=[], outputs=[ab_summary, ab_results])
|
| 445 |
|
| 446 |
-
|
|
|
|
| 40 |
df = df.fillna(df.mean(numeric_only=True))
|
| 41 |
return df
|
| 42 |
|
| 43 |
+
def upload_file(file):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
global df_global
|
| 45 |
+
if file is None:
|
| 46 |
+
return pd.DataFrame({"Error": ["No file uploaded."]})
|
| 47 |
+
ext = os.path.splitext(file.name)[-1]
|
| 48 |
+
df = pd.read_csv(file.name) if ext == ".csv" else pd.read_excel(file.name)
|
| 49 |
+
df = clean_data(df)
|
| 50 |
+
df_global = df
|
| 51 |
+
return df.head()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
def format_analysis_report(raw_output, visuals):
|
| 54 |
try:
|
|
|
|
| 252 |
print(f"Training Error: {e}")
|
| 253 |
return {}, pd.DataFrame()
|
| 254 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
|
| 256 |
def explainability(_):
|
|
|
|
| 257 |
import warnings
|
| 258 |
warnings.filterwarnings("ignore")
|
|
|
|
|
|
|
| 259 |
|
| 260 |
target = df_global.columns[-1]
|
| 261 |
X = df_global.drop(target, axis=1)
|
|
|
|
| 331 |
with gr.Blocks() as demo:
|
| 332 |
gr.Markdown("## π AI-Powered Data Analysis with Hyperparameter Optimization")
|
| 333 |
|
|
|
|
| 334 |
with gr.Row():
|
| 335 |
with gr.Column():
|
| 336 |
file_input = gr.File(label="Upload CSV or Excel", type="filepath")
|
| 337 |
+
df_output = gr.DataFrame(label="Cleaned Data Preview")
|
| 338 |
+
file_input.change(fn=upload_file, inputs=file_input, outputs=df_output)
|
| 339 |
|
| 340 |
with gr.Column():
|
| 341 |
+
insights_output = gr.HTML(label="Insights from SmolAgent")
|
| 342 |
+
visual_output = gr.Gallery(label="Visualizations (Auto-generated by Agent)", columns=2)
|
| 343 |
+
agent_btn = gr.Button("Run AI Agent (5 Insights + 5 Visualizations)")
|
| 344 |
|
|
|
|
| 345 |
with gr.Row():
|
| 346 |
+
train_btn = gr.Button("Train Model with Optuna + WandB")
|
| 347 |
+
metrics_output = gr.JSON(label="Performance Metrics")
|
| 348 |
+
trials_output = gr.DataFrame(label="Top 7 Hyperparameter Trials")
|
| 349 |
|
|
|
|
| 350 |
with gr.Row():
|
| 351 |
explain_btn = gr.Button("SHAP + LIME Explainability")
|
| 352 |
+
shap_img = gr.Image(label="SHAP Summary Plot")
|
| 353 |
+
lime_img = gr.Image(label="LIME Explanation")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
|
| 355 |
+
agent_btn.click(fn=analyze_data, inputs=[file_input], outputs=[insights_output, visual_output])
|
| 356 |
+
train_btn.click(fn=train_model, inputs=[file_input], outputs=[metrics_output, trials_output])
|
| 357 |
+
explain_btn.click(fn=explainability, inputs=[], outputs=[shap_img, lime_img])
|
|
|
|
|
|
|
| 358 |
|
| 359 |
+
demo.launch(debug=True)
|