Spaces:

shimaa22
/

analysis_web

Sleeping

App Files Files Community

shimaa22 commited on 20 days ago

Commit

d4b1c85

verified ·

1 Parent(s): f388de4

Update app.py

Browse files

Files changed (1) hide show

app.py +148 -98

app.py CHANGED Viewed

@@ -14,25 +14,31 @@ from sklearn.metrics import (
     accuracy_score,
     precision_score,
     recall_score,
     confusion_matrix
 )
 from imblearn.over_sampling import SMOTE
-from reportlab.pdfgen import canvas
 # =========================
-# GLOBAL
 # =========================
 df_global = None
-best_model_obj = None
 best_model_name = None
-X_global = None
-y_global = None
 # =========================
-# UPLOAD + CLEAN
 # =========================
 def upload_and_clean(file):
@@ -58,12 +64,10 @@ def upload_and_clean(file):
 # =========================
-# VISUALIZATION
 # =========================
 def analyze_data(target):
-    global df_global
     df = df_global.copy()
     images = []
@@ -100,11 +104,35 @@ def analyze_data(target):
 # =========================
-# ML WITH SMOTE + CLASS WEIGHT
 # =========================
 def run_ml(target):
-    global df_global, best_model_obj, best_model_name, X_global, y_global
     df = df_global.copy()
@@ -116,39 +144,28 @@ def run_ml(target):
     X = df.drop(columns=[target])
     y = df[target]
-    X_global = X
-    y_global = y
-    # =========================
-    # imbalance detection
-    # =========================
-    counts = np.bincount(y)
-    imbalance_ratio = min(counts) / max(counts)
-    is_imbalanced = imbalance_ratio < 0.5
-    # split
     X_train, X_test, y_train, y_test = train_test_split(
         X, y, test_size=0.2, random_state=42
     )
     models = {
         "Decision Tree": DecisionTreeClassifier(),
         "Random Forest": RandomForestClassifier(),
         "XGBoost": XGBClassifier(eval_metric="logloss")
     }
-    # =========================
-    # RESULT TABLES
-    # =========================
-    no_results = []
-    cw_results = []
-    smote_results = []
     best_score = 0
-    # =====================================================
-    # 1️⃣ NO SAMPLING
-    # =====================================================
     for name, model in models.items():
         model.fit(X_train, y_train)
@@ -156,36 +173,49 @@ def run_ml(target):
         acc = accuracy_score(y_test, pred)
-        no_results.append({
             "Model": name,
-            "Accuracy": acc
         })
         if acc > best_score:
             best_score = acc
-            best_model_obj = model
-            best_model_name = name + " (No Sampling)"
-    # =====================================================
-    # 2️⃣ CLASS WEIGHT
-    # =====================================================
-    for name, model in models.items():
-        if name != "XGBoost":
-            model = DecisionTreeClassifier(class_weight="balanced") if name=="Decision Tree" else RandomForestClassifier(class_weight="balanced")
         model.fit(X_train, y_train)
         pred = model.predict(X_test)
-        cw_results.append({
             "Model": name,
-            "Accuracy": accuracy_score(y_test, pred)
         })
-    # =====================================================
-    # 3️⃣ SMOTE
-    # =====================================================
-    if is_imbalanced:
         sm = SMOTE(random_state=42)
         X_res, y_res = sm.fit_resample(X_train, y_train)
     else:
@@ -196,16 +226,28 @@ def run_ml(target):
         model.fit(X_res, y_res)
         pred = model.predict(X_test)
-        smote_results.append({
             "Model": name,
-            "Accuracy": accuracy_score(y_test, pred)
         })
     return (
-        f"Imbalanced Dataset: {is_imbalanced}",
-        pd.DataFrame(no_results),
-        pd.DataFrame(cw_results),
-        pd.DataFrame(smote_results)
     )
@@ -214,12 +256,13 @@ def run_ml(target):
 # =========================
 def feature_importance():
-    global best_model_obj, X_global
     if hasattr(best_model_obj, "feature_importances_"):
         plt.figure(figsize=(6,4))
-        plt.barh(X_global.columns, best_model_obj.feature_importances_)
         path = "/tmp/feat.png"
         plt.savefig(path)
@@ -233,33 +276,58 @@ def feature_importance():
 # =========================
 # PDF REPORT
 # =========================
-def download_report():
-    global best_model_name
     path = "/tmp/report.pdf"
-    c = canvas.Canvas(path)
-    c.drawString(100, 750, "Auto ML Report")
-    c.drawString(100, 730, f"Best Model: {best_model_name}")
-    c.drawString(100, 700, "Includes SMOTE + Class Weight Comparison")
-    c.save()
     return path
 # =========================
-# FULL ANALYSIS
 # =========================
 def full_analysis(target):
-    ml_status, no_df, cw_df, smote_df = run_ml(target)
-    images = analyze_data(target)
-    return ml_status, no_df, cw_df, smote_df, images
 # =========================
@@ -267,59 +335,41 @@ def full_analysis(target):
 # =========================
 with gr.Blocks() as demo:
-    gr.Markdown("# 🚀 Advanced AutoML System (SMOTE + Class Weight)")
     file = gr.File()
-    upload_btn = gr.Button("Upload Data")
     status = gr.Textbox()
     preview = gr.Dataframe()
-    target = gr.Dropdown(label="Select Target")
     run_btn = gr.Button("Run Full Analysis")
     ml_status = gr.Textbox()
-    no_table = gr.Dataframe(label="No Sampling")
-    cw_table = gr.Dataframe(label="Class Weight")
-    smote_table = gr.Dataframe(label="SMOTE")
-    gallery = gr.Gallery(label="Visualizations", columns=2)
     feat_btn = gr.Button("Feature Importance")
     feat_img = gr.Image()
-    pdf_btn = gr.Button("Download Report")
     pdf_file = gr.File()
-    # upload
-    upload_btn.click(
-        upload_and_clean,
-        file,
-        [status, preview, target, target]
-    )
-    # full analysis
-    run_btn.click(
-        full_analysis,
-        target,
-        [ml_status, no_table, cw_table, smote_table, gallery]
-    )
-    # feature importance
-    feat_btn.click(
-        feature_importance,
-        None,
-        feat_img
-    )
-    # pdf
-    pdf_btn.click(
-        download_report,
-        None,
-        pdf_file
-    )
 demo.launch(share=True)

     accuracy_score,
     precision_score,
     recall_score,
+    f1_score,
     confusion_matrix
 )
 from imblearn.over_sampling import SMOTE
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle
+from reportlab.lib import colors
+from reportlab.lib.styles import getSampleStyleSheet
 # =========================
+# GLOBALS
 # =========================
 df_global = None
 best_model_name = None
+best_model_obj = None
+no_global = None
+cw_global = None
+smote_global = None
+cm_global = None
 # =========================
+# UPLOAD
 # =========================
 def upload_and_clean(file):
 # =========================
+# ANALYSIS VISUALIZATION
 # =========================
 def analyze_data(target):
     df = df_global.copy()
     images = []
 # =========================
+# CONFUSION MATRIX
+# =========================
+def plot_cm(y_true, y_pred, title):
+    cm = confusion_matrix(y_true, y_pred)
+    plt.figure(figsize=(4,4))
+    plt.imshow(cm, cmap="Blues")
+    plt.title(title)
+    for i in range(cm.shape[0]):
+        for j in range(cm.shape[1]):
+            plt.text(j, i, cm[i, j], ha="center", va="center")
+    path = f"/tmp/{title}.png"
+    plt.savefig(path)
+    plt.close()
+    return path
+# =========================
+# ML (NO / CW / SMOTE)
 # =========================
 def run_ml(target):
+    global df_global, best_model_name
+    global no_global, cw_global, smote_global, cm_global
     df = df_global.copy()
     X = df.drop(columns=[target])
     y = df[target]
     X_train, X_test, y_train, y_test = train_test_split(
         X, y, test_size=0.2, random_state=42
     )
+    # imbalance check
+    counts = np.bincount(y)
+    imbalance = min(counts) / max(counts) < 0.5
     models = {
         "Decision Tree": DecisionTreeClassifier(),
         "Random Forest": RandomForestClassifier(),
         "XGBoost": XGBClassifier(eval_metric="logloss")
     }
+    no_rows, cw_rows, smote_rows = [], [], []
+    cm_images = {}
     best_score = 0
+    # =========================
+    # NO SAMPLING
+    # =========================
     for name, model in models.items():
         model.fit(X_train, y_train)
         acc = accuracy_score(y_test, pred)
+        no_rows.append({
             "Model": name,
+            "Accuracy": acc,
+            "Precision": precision_score(y_test, pred, average="weighted", zero_division=0),
+            "Recall": recall_score(y_test, pred, average="weighted", zero_division=0),
+            "F1": f1_score(y_test, pred, average="weighted", zero_division=0)
         })
+        cm_images[f"{name}_no"] = plot_cm(y_test, pred, f"{name}_NO")
         if acc > best_score:
             best_score = acc
+            best_model_name = name + " (No)"
+    # =========================
+    # CLASS WEIGHT
+    # =========================
+    for name in models.keys():
+        if name == "Decision Tree":
+            model = DecisionTreeClassifier(class_weight="balanced")
+        elif name == "Random Forest":
+            model = RandomForestClassifier(class_weight="balanced")
+        else:
+            model = XGBClassifier(eval_metric="logloss")
         model.fit(X_train, y_train)
         pred = model.predict(X_test)
+        cw_rows.append({
             "Model": name,
+            "Accuracy": accuracy_score(y_test, pred),
+            "Precision": precision_score(y_test, pred, average="weighted", zero_division=0),
+            "Recall": recall_score(y_test, pred, average="weighted", zero_division=0),
+            "F1": f1_score(y_test, pred, average="weighted", zero_division=0)
         })
+        cm_images[f"{name}_cw"] = plot_cm(y_test, pred, f"{name}_CW")
+    # =========================
+    # SMOTE
+    # =========================
+    if imbalance:
         sm = SMOTE(random_state=42)
         X_res, y_res = sm.fit_resample(X_train, y_train)
     else:
         model.fit(X_res, y_res)
         pred = model.predict(X_test)
+        smote_rows.append({
             "Model": name,
+            "Accuracy": accuracy_score(y_test, pred),
+            "Precision": precision_score(y_test, pred, average="weighted", zero_division=0),
+            "Recall": recall_score(y_test, pred, average="weighted", zero_division=0),
+            "F1": f1_score(y_test, pred, average="weighted", zero_division=0)
         })
+        cm_images[f"{name}_smote"] = plot_cm(y_test, pred, f"{name}_SMOTE")
+    # store globally
+    no_global = pd.DataFrame(no_rows)
+    cw_global = pd.DataFrame(cw_rows)
+    smote_global = pd.DataFrame(smote_rows)
+    cm_global = cm_images
     return (
+        f"Imbalance: {imbalance}",
+        no_global,
+        cw_global,
+        smote_global,
+        list(cm_images.values())
     )
 # =========================
 def feature_importance():
+    global best_model_obj
     if hasattr(best_model_obj, "feature_importances_"):
         plt.figure(figsize=(6,4))
+        plt.barh(range(len(best_model_obj.feature_importances_)),
+                 best_model_obj.feature_importances_)
         path = "/tmp/feat.png"
         plt.savefig(path)
 # =========================
 # PDF REPORT
 # =========================
+def generate_pdf():
+    global no_global, cw_global, smote_global, cm_global, best_model_name
     path = "/tmp/report.pdf"
+    doc = SimpleDocTemplate(path)
+    styles = getSampleStyleSheet()
+    elements = []
+    elements.append(Paragraph("AutoML Full Report", styles["Title"]))
+    elements.append(Spacer(1, 10))
+    elements.append(Paragraph(f"Best Model: {best_model_name}", styles["Heading2"]))
+    def add_table(df, title):
+        elements.append(Spacer(1, 10))
+        elements.append(Paragraph(title, styles["Heading3"]))
+        data = [df.columns.tolist()] + df.values.tolist()
+        table = Table(data)
+        table.setStyle(TableStyle([
+            ("BACKGROUND", (0,0), (-1,0), colors.grey),
+            ("TEXTCOLOR", (0,0), (-1,0), colors.white),
+            ("GRID", (0,0), (-1,-1), 0.5, colors.black)
+        ]))
+        elements.append(table)
+    add_table(no_global, "No Sampling")
+    add_table(cw_global, "Class Weight")
+    add_table(smote_global, "SMOTE")
+    elements.append(Spacer(1, 10))
+    elements.append(Paragraph("Confusion Matrices", styles["Heading2"]))
+    for name, img in cm_global.items():
+        elements.append(Paragraph(name, styles["Normal"]))
+        elements.append(Image(img, width=200, height=200))
+    doc.build(elements)
     return path
 # =========================
+# ANALYSIS
 # =========================
 def full_analysis(target):
+    ml_status, no_df, cw_df, smote_df, imgs = run_ml(target)
+    return ml_status, no_df, cw_df, smote_df, imgs
 # =========================
 # =========================
 with gr.Blocks() as demo:
+    gr.Markdown("# 🚀 Advanced AutoML System")
     file = gr.File()
+    upload_btn = gr.Button("Upload")
     status = gr.Textbox()
     preview = gr.Dataframe()
+    target = gr.Dropdown(label="Target")
     run_btn = gr.Button("Run Full Analysis")
     ml_status = gr.Textbox()
+    no_table = gr.Dataframe()
+    cw_table = gr.Dataframe()
+    smote_table = gr.Dataframe()
+    gallery = gr.Gallery(columns=2)
     feat_btn = gr.Button("Feature Importance")
     feat_img = gr.Image()
+    pdf_btn = gr.Button("Download PDF")
     pdf_file = gr.File()
+    upload_btn.click(upload_and_clean, file,
+                      [status, preview, target, target])
+    run_btn.click(full_analysis, target,
+                  [ml_status, no_table, cw_table, smote_table, gallery])
+    feat_btn.click(feature_importance, None, feat_img)
+    pdf_btn.click(generate_pdf, None, pdf_file)
 demo.launch(share=True)