Spaces:

shimaa22
/

analysis_web

Sleeping

App Files Files Community

shimaa22 commited on 29 days ago

Commit

2967f48

verified ·

1 Parent(s): 060f18c

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -47

app.py CHANGED Viewed

@@ -1,5 +1,3 @@
-# app.py
 import gradio as gr
 import pandas as pd
 import numpy as np
@@ -26,20 +24,20 @@ from sklearn.metrics import (
     r2_score
 )
-from reportlab.lib.pagesizes import letter
 from reportlab.pdfgen import canvas
 # =========================
 # GLOBAL
 # =========================
 df_global = None
-best_model_global = None
 best_model_obj = None
 X_global = None
 y_global = None
 # =========================
-# UPLOAD
 # =========================
 def upload_and_clean(file):
@@ -54,40 +52,70 @@ def upload_and_clean(file):
         else:
             df[col] = df[col].fillna(df[col].mode()[0])
-    df_global = df.copy()
-    return "Data Loaded", df.head(), gr.update(choices=list(df.columns)), gr.update(choices=list(df.columns))
 # =========================
-# FEATURE IMPORTANCE
 # =========================
-def feature_importance_plot(model, X, title):
-    if hasattr(model, "feature_importances_"):
-        imp = model.feature_importances_
-        plt.figure(figsize=(6,4))
-        plt.barh(X.columns, imp)
-        plt.title("Feature Importance")
-        path = "/tmp/feature_importance.png"
         plt.savefig(path)
         plt.close()
-        return path
-    return None
 # =========================
-# ML
 # =========================
 def run_ml(target):
-    global df_global, best_model_global, best_model_obj, X_global, y_global
     df = df_global.copy()
     for col in df.columns:
         if not pd.api.types.is_numeric_dtype(df[col]):
             df[col] = LabelEncoder().fit_transform(df[col].astype(str))
@@ -100,11 +128,14 @@ def run_ml(target):
     is_classification = len(np.unique(y)) <= 20
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
     results = []
-    best_score = 0
     if is_classification:
         models = {
@@ -129,13 +160,29 @@ def run_ml(target):
             if acc > best_score:
                 best_score = acc
-                best_model_global = name
                 best_model_obj = model
         leaderboard = pd.DataFrame(results).sort_values("Accuracy", ascending=False)
-        return "Classification", leaderboard
     else:
         models = {
@@ -158,42 +205,65 @@ def run_ml(target):
         leaderboard = pd.DataFrame(results).sort_values("R2", ascending=False)
-        best_model_global = leaderboard.iloc[0]["Model"]
-        return "Regression", leaderboard
 # =========================
-# FEATURE IMPORTANCE OUTPUT
 # =========================
-def show_feature_importance():
     global best_model_obj, X_global
-    if best_model_obj is None:
-        return None
-    return feature_importance_plot(best_model_obj, X_global, "Feature Importance")
 # =========================
 # PDF REPORT
 # =========================
-def download_report():
-    global best_model_global
     file_path = "/tmp/report.pdf"
-    c = canvas.Canvas(file_path, pagesize=letter)
     c.drawString(100, 750, "Auto ML Report")
-    c.drawString(100, 730, f"Best Model: {best_model_global}")
-    c.drawString(100, 700, "Generated by Auto ML System")
     c.save()
     return file_path
 # =========================
 # UI
 # =========================
@@ -210,17 +280,22 @@ with gr.Blocks() as demo:
     target = gr.Dropdown(label="Target")
-    run_btn = gr.Button("Run ML")
     ml_status = gr.Textbox()
     leaderboard = gr.Dataframe()
-    cm = gr.Image()
-    feature_btn = gr.Button("Show Feature Importance")
-    feature_img = gr.Image()
-    pdf_btn = gr.Button("Download Report PDF")
     pdf_file = gr.File()
     # upload
@@ -230,23 +305,23 @@ with gr.Blocks() as demo:
         [status, preview, target, target]
     )
-    # ML
     run_btn.click(
-        run_ml,
         target,
-        [ml_status, leaderboard]
     )
     # feature importance
-    feature_btn.click(
-        show_feature_importance,
         None,
-        feature_img
     )
     # pdf
     pdf_btn.click(
-        download_report,
         None,
         pdf_file
     )

 import gradio as gr
 import pandas as pd
 import numpy as np
     r2_score
 )
 from reportlab.pdfgen import canvas
 # =========================
 # GLOBAL
 # =========================
 df_global = None
 best_model_obj = None
+best_model_name = None
 X_global = None
 y_global = None
 # =========================
+# UPLOAD + CLEAN
 # =========================
 def upload_and_clean(file):
         else:
             df[col] = df[col].fillna(df[col].mode()[0])
+    df_global = df
+    return (
+        "Data Loaded Successfully",
+        df.head(),
+        gr.update(choices=list(df.columns)),
+        gr.update(choices=list(df.columns))
+    )
 # =========================
+# VISUALIZATION (BAR + PIE)
 # =========================
+def analyze_data(target):
+    global df_global
+    df = df_global.copy()
+    images = []
+    cols = [c for c in df.columns if c != target]
+    for col in cols[:8]:
+        fig, axes = plt.subplots(1, 2, figsize=(12, 4))
+        # BAR
+        df[col].astype(str).value_counts().head(10).plot(
+            kind="bar",
+            ax=axes[0]
+        )
+        axes[0].set_title(f"Bar - {col}")
+        axes[0].tick_params(axis='x', rotation=45)
+        # PIE
+        df[col].astype(str).value_counts().head(6).plot(
+            kind="pie",
+            ax=axes[1],
+            autopct="%1.1f%%"
+        )
+        axes[1].set_title(f"Pie - {col}")
+        axes[1].set_ylabel("")
+        plt.tight_layout()
+        path = f"/tmp/{col}.png"
         plt.savefig(path)
         plt.close()
+        images.append(path)
+    return images
 # =========================
+# ML TRAINING
 # =========================
 def run_ml(target):
+    global df_global, best_model_obj, best_model_name, X_global, y_global
     df = df_global.copy()
+    # encode all categorical
     for col in df.columns:
         if not pd.api.types.is_numeric_dtype(df[col]):
             df[col] = LabelEncoder().fit_transform(df[col].astype(str))
     is_classification = len(np.unique(y)) <= 20
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42
+    )
     results = []
+    best_score = -999
+    # ================= CLASSIFICATION =================
     if is_classification:
         models = {
             if acc > best_score:
                 best_score = acc
                 best_model_obj = model
+                best_model_name = name
         leaderboard = pd.DataFrame(results).sort_values("Accuracy", ascending=False)
+        # confusion matrix
+        cm = confusion_matrix(y_test, best_model_obj.predict(X_test))
+        fig = plt.figure()
+        plt.imshow(cm, cmap="Blues")
+        plt.title(f"Best Model: {best_model_name}")
+        for i in range(cm.shape[0]):
+            for j in range(cm.shape[1]):
+                plt.text(j, i, cm[i, j], ha="center", va="center")
+        cm_path = "/tmp/cm.png"
+        plt.savefig(cm_path)
+        plt.close()
+        return "Classification", leaderboard, cm_path
+    # ================= REGRESSION =================
     else:
         models = {
         leaderboard = pd.DataFrame(results).sort_values("R2", ascending=False)
+        best_model_name = leaderboard.iloc[0]["Model"]
+        return "Regression", leaderboard, None
 # =========================
+# FEATURE IMPORTANCE
 # =========================
+def feature_importance():
     global best_model_obj, X_global
+    if hasattr(best_model_obj, "feature_importances_"):
+        plt.figure(figsize=(6,4))
+        plt.barh(X_global.columns, best_model_obj.feature_importances_)
+        path = "/tmp/feature.png"
+        plt.savefig(path)
+        plt.close()
+        return path
+    return None
 # =========================
 # PDF REPORT
 # =========================
+def download_pdf():
+    global best_model_name
     file_path = "/tmp/report.pdf"
+    c = canvas.Canvas(file_path)
     c.drawString(100, 750, "Auto ML Report")
+    c.drawString(100, 730, f"Best Model: {best_model_name}")
+    c.drawString(100, 700, "Generated Successfully")
     c.save()
     return file_path
+# =========================
+# COMBINED RUN
+# =========================
+def full_run(target):
+    status, leaderboard, cm = run_ml(target)
+    images = analyze_data(target)
+    return status, leaderboard, cm, images
 # =========================
 # UI
 # =========================
     target = gr.Dropdown(label="Target")
+    run_btn = gr.Button("RUN FULL ANALYSIS")
     ml_status = gr.Textbox()
     leaderboard = gr.Dataframe()
+    cm_img = gr.Image()
+    gallery = gr.Gallery(
+        label="Analysis Charts (Click to Enlarge)",
+        columns=2
+    )
+    feat_btn = gr.Button("Feature Importance")
+    feat_img = gr.Image()
+    pdf_btn = gr.Button("Download Report")
     pdf_file = gr.File()
     # upload
         [status, preview, target, target]
     )
+    # full analysis
     run_btn.click(
+        full_run,
         target,
+        [ml_status, leaderboard, cm_img, gallery]
     )
     # feature importance
+    feat_btn.click(
+        feature_importance,
         None,
+        feat_img
     )
     # pdf
     pdf_btn.click(
+        download_pdf,
         None,
         pdf_file
     )