AIDataAgentProjectFinal

Paused

App Files Files Community

pavanmutha commited on Apr 13, 2025

Commit

e7399a2

verified ·

1 Parent(s): abe6fa2

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -0

app.py CHANGED Viewed

@@ -17,6 +17,7 @@ from huggingface_hub import login
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import train_test_split, cross_val_score
 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 from sklearn.preprocessing import LabelEncoder
 from PIL import Image
@@ -151,6 +152,35 @@ def analyze_data(csv_file, additional_notes=""):
     run.finish()
     return format_analysis_report(analysis_result, visuals)
@@ -171,6 +201,61 @@ def train_model(_):
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
     def objective(trial):
         params = {
             "n_estimators": trial.suggest_int("n_estimators", 50, 200),

 from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import train_test_split, cross_val_score
 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
+from sklearn.metrics import ConfusionMatrixDisplay
 from sklearn.preprocessing import LabelEncoder
 from PIL import Image
     run.finish()
     return format_analysis_report(analysis_result, visuals)
+def compare_models():
+    if df_global is None:
+        return "Please upload and preprocess a dataset first."
+    target = df_global.columns[-1]
+    X = df_global.drop(target, axis=1)
+    y = df_global[target]
+    if y.dtype == 'object':
+        y = LabelEncoder().fit_transform(y)
+    models = {
+        "RandomForest": RandomForestClassifier(),
+        "LogisticRegression": LogisticRegression(max_iter=1000),
+        "SVC": SVC()
+    }
+    results = []
+    for name, model in models.items():
+        scores = cross_val_score(model, X, y, cv=5)
+        results.append({
+            "Model": name,
+            "CV Mean Accuracy": np.mean(scores),
+            "CV Std Dev": np.std(scores)
+        })
+        wandb.log({f"{name}_cv_mean": np.mean(scores), f"{name}_cv_std": np.std(scores)})
+    results_df = pd.DataFrame(results)
+    return results_df
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+    # Error analysis
+    error_df = X_test.copy()
+    error_df["actual"] = y_test
+    error_df["predicted"] = y_pred
+    error_df["error"] = error_df["actual"] != error_df["predicted"]
+    common_errors = error_df[error_df["error"]].groupby(["actual", "predicted"]).size().reset_index(name='count')
+    def generate_report(metrics_df, trials_df, common_errors_df):
+    report = f"""
+    # Model Training Report
+    ## Metrics
+    {metrics_df.to_markdown(index=False)}
+    ## Top Trials
+    {trials_df.to_markdown(index=False)}
+    ## Common Errors
+    {common_errors_df.to_markdown(index=False)}
+    _Generated on {time.strftime('%Y-%m-%d %H:%M:%S')}_
+    """
+    with open("model_report.md", "w") as f:
+        f.write(report)
+    return "Report saved to model_report.md"
+fig, ax = plt.subplots(figsize=(6, 4))
+ConfusionMatrixDisplay.from_estimator(best_model, X_test, y_test, ax=ax)
+plt.savefig("confusion_matrix.png")
+wandb.log({"confusion_matrix": wandb.Image("confusion_matrix.png")})
+# Inside your layout:
+compare_button = gr.Button("Compare Models")
+compare_output = gr.Dataframe()
+compare_button.click(fn=compare_models, outputs=compare_output)
+report_button = gr.Button("Generate Report")
+report_status = gr.Textbox()
+report_button.click(
+    fn=lambda: generate_report(metrics_df, trials_df, common_errors),
+    outputs=report_status
+)
+# Log common misclassifications to wandb
+wandb.log({"common_errors": wandb.Table(dataframe=common_errors)})
     def objective(trial):
         params = {
             "n_estimators": trial.suggest_int("n_estimators", 50, 200),