pavanmutha commited on
Commit
b31f11f
Β·
verified Β·
1 Parent(s): d6120a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -109
app.py CHANGED
@@ -40,31 +40,15 @@ def clean_data(df):
40
  df = df.fillna(df.mean(numeric_only=True))
41
  return df
42
 
43
- def upload_file(file_path):
44
- """
45
- file_path comes in as a str because type="filepath".
46
- We catch any error and return a tiny DataFrame with the message.
47
- """
48
  global df_global
49
- try:
50
- if not file_path:
51
- raise ValueError("No file uploaded.")
52
- ext = os.path.splitext(file_path)[-1].lower()
53
- if ext == ".csv":
54
- df = pd.read_csv(file_path)
55
- elif ext in {".xls", ".xlsx"}:
56
- df = pd.read_excel(file_path)
57
- else:
58
- raise ValueError(f"Unsupported extension: {ext}")
59
-
60
- df = clean_data(df)
61
- df_global = df
62
- return df.head()
63
-
64
- except Exception as e:
65
- # Return a 1Γ—1 DataFrame so Gradio won’t crash
66
- return pd.DataFrame({"Error": [str(e)]})
67
-
68
 
69
  def format_analysis_report(raw_output, visuals):
70
  try:
@@ -268,70 +252,10 @@ def train_model(_):
268
  print(f"Training Error: {e}")
269
  return {}, pd.DataFrame()
270
 
271
- def ab_test_models():
272
- global df_global
273
- if df_global is None:
274
- return "⚠️ Please upload and preprocess a dataset first.", pd.DataFrame()
275
-
276
- try:
277
- # split off last column as target
278
- target = df_global.columns[-1]
279
- X = df_global.drop(columns=[target])
280
- y = df_global[target]
281
- if y.dtype == 'object':
282
- y = LabelEncoder().fit_transform(y)
283
-
284
- X_train, X_test, y_train, y_test = train_test_split(
285
- X, y, test_size=0.3, random_state=42
286
- )
287
-
288
- models = {
289
- "Random Forest": RandomForestClassifier(n_estimators=100),
290
- "Logistic Regression": LogisticRegression(max_iter=1000),
291
- "Gradient Boosting": GradientBoostingClassifier()
292
- }
293
-
294
- results = []
295
- for name, clf in models.items():
296
- clf.fit(X_train, y_train)
297
- y_pred = clf.predict(X_test)
298
- metrics = {
299
- "Model": name,
300
- "Accuracy": accuracy_score(y_test, y_pred),
301
- "Precision": precision_score(y_test, y_pred, average="weighted", zero_division=0),
302
- "Recall": recall_score(y_test, y_pred, average="weighted", zero_division=0),
303
- "F1 Score": f1_score(y_test, y_pred, average="weighted", zero_division=0)
304
- }
305
- results.append(metrics)
306
-
307
- # safe WandB logging
308
- try:
309
- wandb.log({f"{name}_metrics": metrics})
310
- except Exception as e:
311
- print(f"[WARN] WandB log failed for {name}: {e}")
312
-
313
- result_df = pd.DataFrame(results)
314
- best = result_df.sort_values("F1 Score", ascending=False).iloc[0]
315
-
316
- summary = f"""
317
- πŸ” <b>Best Model:</b> {best['Model']}<br>
318
- βœ… <b>F1 Score:</b> {best['F1 Score']:.4f}<br>
319
- πŸ“Š <b>Accuracy:</b> {best['Accuracy']:.4f}<br>
320
- 🧠 <b>Precision:</b> {best['Precision']:.4f}<br>
321
- πŸ” <b>Recall:</b> {best['Recall']:.4f}
322
- """
323
- return summary, result_df
324
-
325
- except Exception as err:
326
- return f"❌ Error during A/B testing:<br>{err}", pd.DataFrame()
327
-
328
 
329
  def explainability(_):
330
- global df_global
331
  import warnings
332
  warnings.filterwarnings("ignore")
333
- if df_global is None:
334
- return None, None
335
 
336
  target = df_global.columns[-1]
337
  X = df_global.drop(target, axis=1)
@@ -407,40 +331,29 @@ def explainability(_):
407
  with gr.Blocks() as demo:
408
  gr.Markdown("## πŸ“Š AI-Powered Data Analysis with Hyperparameter Optimization")
409
 
410
- # ─────────────────────── Upload & Preview ───────────────────────
411
  with gr.Row():
412
  with gr.Column():
413
  file_input = gr.File(label="Upload CSV or Excel", type="filepath")
414
- df_output = gr.DataFrame(label="Cleaned Data Preview")
415
- file_input.change(fn=upload_file, inputs=[file_input], outputs=[dff := df_output])
416
 
417
  with gr.Column():
418
- insights_output = gr.HTML(label="Insights")
419
- #visual_output = gr.Gallery(columns=2, label="Visuals")
420
- agent_btn = gr.Button("Run AI Agent")
421
 
422
- # ─────────────────── Hyperopt + Trials ──��────────────────
423
  with gr.Row():
424
- train_btn = gr.Button("Train Model")
425
- metrics_output = gr.JSON(label="Metrics")
426
- trials_output = gr.DataFrame(label="Top Trials")
427
 
428
- # ─────────────────── Explainability ───────────────────
429
  with gr.Row():
430
  explain_btn = gr.Button("SHAP + LIME Explainability")
431
- shap_img = gr.Image(label="SHAP Summary")
432
- lime_img = gr.Image(label="LIME Explanation")
433
-
434
- # ─────────────────── A/B Testing ───────────────────
435
- with gr.Row():
436
- ab_test_button = gr.Button("Run A/B Testing")
437
- ab_summary = gr.HTML(label="A/B Test Summary")
438
- ab_results = gr.DataFrame(label="A/B Test Results")
439
 
440
- # ─────────────────── Hook callbacks ───────────────────
441
- # agent_btn.click(fn=analyze_data, inputs=[file_input], outputs=[insights_output, visual_output])
442
- # train_btn.click(fn=train_model, inputs=[file_input], outputs=[metrics_output, trials_output])
443
- # explain_btn.click(fn=explainability, inputs=[], outputs=[shap_img, lime_img])
444
- ab_test_button.click(fn=ab_test_models, inputs=[], outputs=[ab_summary, ab_results])
445
 
446
- demo.launch(debug=True)
 
40
  df = df.fillna(df.mean(numeric_only=True))
41
  return df
42
 
43
+ def upload_file(file):
 
 
 
 
44
  global df_global
45
+ if file is None:
46
+ return pd.DataFrame({"Error": ["No file uploaded."]})
47
+ ext = os.path.splitext(file.name)[-1]
48
+ df = pd.read_csv(file.name) if ext == ".csv" else pd.read_excel(file.name)
49
+ df = clean_data(df)
50
+ df_global = df
51
+ return df.head()
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  def format_analysis_report(raw_output, visuals):
54
  try:
 
252
  print(f"Training Error: {e}")
253
  return {}, pd.DataFrame()
254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
  def explainability(_):
 
257
  import warnings
258
  warnings.filterwarnings("ignore")
 
 
259
 
260
  target = df_global.columns[-1]
261
  X = df_global.drop(target, axis=1)
 
331
  with gr.Blocks() as demo:
332
  gr.Markdown("## πŸ“Š AI-Powered Data Analysis with Hyperparameter Optimization")
333
 
 
334
  with gr.Row():
335
  with gr.Column():
336
  file_input = gr.File(label="Upload CSV or Excel", type="filepath")
337
+ df_output = gr.DataFrame(label="Cleaned Data Preview")
338
+ file_input.change(fn=upload_file, inputs=file_input, outputs=df_output)
339
 
340
  with gr.Column():
341
+ insights_output = gr.HTML(label="Insights from SmolAgent")
342
+ visual_output = gr.Gallery(label="Visualizations (Auto-generated by Agent)", columns=2)
343
+ agent_btn = gr.Button("Run AI Agent (5 Insights + 5 Visualizations)")
344
 
 
345
  with gr.Row():
346
+ train_btn = gr.Button("Train Model with Optuna + WandB")
347
+ metrics_output = gr.JSON(label="Performance Metrics")
348
+ trials_output = gr.DataFrame(label="Top 7 Hyperparameter Trials")
349
 
 
350
  with gr.Row():
351
  explain_btn = gr.Button("SHAP + LIME Explainability")
352
+ shap_img = gr.Image(label="SHAP Summary Plot")
353
+ lime_img = gr.Image(label="LIME Explanation")
 
 
 
 
 
 
354
 
355
+ agent_btn.click(fn=analyze_data, inputs=[file_input], outputs=[insights_output, visual_output])
356
+ train_btn.click(fn=train_model, inputs=[file_input], outputs=[metrics_output, trials_output])
357
+ explain_btn.click(fn=explainability, inputs=[], outputs=[shap_img, lime_img])
 
 
358
 
359
+ demo.launch(debug=True)