AIDataAgentProjectFinal

Paused

App Files Files Community

pavanmutha commited on Apr 13, 2025

Commit

4949145

verified ·

1 Parent(s): e7399a2

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -59

app.py CHANGED Viewed

@@ -182,9 +182,6 @@ def compare_models():
     results_df = pd.DataFrame(results)
     return results_df
 def train_model(_):
     wandb.login(key=os.environ.get("WANDB_API_KEY"))
     run_counter = 1
@@ -209,34 +206,29 @@ def train_model(_):
     common_errors = error_df[error_df["error"]].groupby(["actual", "predicted"]).size().reset_index(name='count')
     def generate_report(metrics_df, trials_df, common_errors_df):
-    report = f"""
-    # Model Training Report
-    ## Metrics
-    {metrics_df.to_markdown(index=False)}
-    ## Top Trials
-    {trials_df.to_markdown(index=False)}
-    ## Common Errors
-    {common_errors_df.to_markdown(index=False)}
-    _Generated on {time.strftime('%Y-%m-%d %H:%M:%S')}_
-    """
-    with open("model_report.md", "w") as f:
-        f.write(report)
-    return "Report saved to model_report.md"
 fig, ax = plt.subplots(figsize=(6, 4))
 ConfusionMatrixDisplay.from_estimator(best_model, X_test, y_test, ax=ax)
 plt.savefig("confusion_matrix.png")
 wandb.log({"confusion_matrix": wandb.Image("confusion_matrix.png")})
 # Inside your layout:
 compare_button = gr.Button("Compare Models")
 compare_output = gr.Dataframe()
@@ -251,40 +243,38 @@ report_button.click(
     outputs=report_status
 )
 # Log common misclassifications to wandb
 wandb.log({"common_errors": wandb.Table(dataframe=common_errors)})
-    def objective(trial):
-        params = {
-            "n_estimators": trial.suggest_int("n_estimators", 50, 200),
-            "max_depth": trial.suggest_int("max_depth", 3, 10),
-        }
-        model = RandomForestClassifier(**params)
-        score = cross_val_score(model, X_train, y_train, cv=3).mean()
-        wandb.log(params | {"cv_score": score})
-        return score
-    study = optuna.create_study(direction="maximize")
-    study.optimize(objective, n_trials=15)
-    best_params = study.best_params
-    model = RandomForestClassifier(**best_params)
-    model.fit(X_train, y_train)
-    y_pred = model.predict(X_test)
-    metrics = {
-        "accuracy": accuracy_score(y_test, y_pred),
-        "precision": precision_score(y_test, y_pred, average="weighted", zero_division=0),
-        "recall": recall_score(y_test, y_pred, average="weighted", zero_division=0),
-        "f1_score": f1_score(y_test, y_pred, average="weighted", zero_division=0),
     }
-    wandb.log(metrics)
-    wandb_run.finish()
-    top_trials = pd.DataFrame(study.trials_dataframe().sort_values(by="value", ascending=False).head(7))
-    return metrics, top_trials
 def explainability(_):
     import warnings
@@ -361,9 +351,6 @@ def explainability(_):
     return shap_path, lime_path
 with gr.Blocks() as demo:
     gr.Markdown("## 📊 AI-Powered Data Analysis with Hyperparameter Optimization")
@@ -374,7 +361,6 @@ with gr.Blocks() as demo:
             file_input.change(fn=upload_file, inputs=file_input, outputs=df_output)
         with gr.Column():
             insights_output = gr.HTML(label="Insights from SmolAgent")
             visual_output = gr.Gallery(label="Visualizations (Auto-generated by Agent)", columns=2)
             agent_btn = gr.Button("Run AI Agent (5 Insights + 5 Visualizations)")
@@ -389,11 +375,8 @@ with gr.Blocks() as demo:
         shap_img = gr.Image(label="SHAP Summary Plot")
         lime_img = gr.Image(label="LIME Explanation")
     agent_btn.click(fn=analyze_data, inputs=[file_input], outputs=[insights_output, visual_output])
     train_btn.click(fn=train_model, inputs=[], outputs=[metrics_output, trials_output])
     explain_btn.click(fn=explainability, inputs=[], outputs=[shap_img, lime_img])
-demo.launch(debug=True)

     results_df = pd.DataFrame(results)
     return results_df
 def train_model(_):
     wandb.login(key=os.environ.get("WANDB_API_KEY"))
     run_counter = 1
     common_errors = error_df[error_df["error"]].groupby(["actual", "predicted"]).size().reset_index(name='count')
     def generate_report(metrics_df, trials_df, common_errors_df):
+        report = f"""
+        # Model Training Report
+        ## Metrics
+        {metrics_df.to_markdown(index=False)}
+        ## Top Trials
+        {trials_df.to_markdown(index=False)}
+        ## Common Errors
+        {common_errors_df.to_markdown(index=False)}
+        _Generated on {time.strftime('%Y-%m-%d %H:%M:%S')}_
+        """
+        with open("model_report.md", "w") as f:
+            f.write(report)
+        return "Report saved to model_report.md"
 fig, ax = plt.subplots(figsize=(6, 4))
 ConfusionMatrixDisplay.from_estimator(best_model, X_test, y_test, ax=ax)
 plt.savefig("confusion_matrix.png")
 wandb.log({"confusion_matrix": wandb.Image("confusion_matrix.png")})
 # Inside your layout:
 compare_button = gr.Button("Compare Models")
 compare_output = gr.Dataframe()
     outputs=report_status
 )
 # Log common misclassifications to wandb
 wandb.log({"common_errors": wandb.Table(dataframe=common_errors)})
+def objective(trial):
+    params = {
+        "n_estimators": trial.suggest_int("n_estimators", 50, 200),
+        "max_depth": trial.suggest_int("max_depth", 3, 10),
     }
+    model = RandomForestClassifier(**params)
+    score = cross_val_score(model, X_train, y_train, cv=3).mean()
+    wandb.log(params | {"cv_score": score})
+    return score
+study = optuna.create_study(direction="maximize")
+study.optimize(objective, n_trials=15)
+best_params = study.best_params
+model = RandomForestClassifier(**best_params)
+model.fit(X_train, y_train)
+y_pred = model.predict(X_test)
+metrics = {
+    "accuracy": accuracy_score(y_test, y_pred),
+    "precision": precision_score(y_test, y_pred, average="weighted", zero_division=0),
+    "recall": recall_score(y_test, y_pred, average="weighted", zero_division=0),
+    "f1_score": f1_score(y_test, y_pred, average="weighted", zero_division=0),
+}
+wandb.log(metrics)
+wandb_run.finish()
+top_trials = pd.DataFrame(study.trials_dataframe().sort_values(by="value", ascending=False).head(7))
+return metrics, top_trials
 def explainability(_):
     import warnings
     return shap_path, lime_path
 with gr.Blocks() as demo:
     gr.Markdown("## 📊 AI-Powered Data Analysis with Hyperparameter Optimization")
             file_input.change(fn=upload_file, inputs=file_input, outputs=df_output)
         with gr.Column():
             insights_output = gr.HTML(label="Insights from SmolAgent")
             visual_output = gr.Gallery(label="Visualizations (Auto-generated by Agent)", columns=2)
             agent_btn = gr.Button("Run AI Agent (5 Insights + 5 Visualizations)")
         shap_img = gr.Image(label="SHAP Summary Plot")
         lime_img = gr.Image(label="LIME Explanation")
     agent_btn.click(fn=analyze_data, inputs=[file_input], outputs=[insights_output, visual_output])
     train_btn.click(fn=train_model, inputs=[], outputs=[metrics_output, trials_output])
     explain_btn.click(fn=explainability, inputs=[], outputs=[shap_img, lime_img])
+demo.launch(debug=True)