Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import LabelEncoder | |
| from sklearn.tree import DecisionTreeClassifier | |
| from sklearn.ensemble import RandomForestClassifier | |
| from xgboost import XGBClassifier | |
| from sklearn.metrics import ( | |
| accuracy_score, | |
| precision_score, | |
| recall_score, | |
| f1_score, | |
| confusion_matrix | |
| ) | |
| from imblearn.over_sampling import SMOTE | |
| from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle | |
| from reportlab.lib import colors | |
| from reportlab.lib.styles import getSampleStyleSheet | |
| # ========================= | |
| # GLOBALS | |
| # ========================= | |
| df_global = None | |
| best_model_name = None | |
| best_model_obj = None | |
| no_global = None | |
| cw_global = None | |
| smote_global = None | |
| cm_global = None | |
| # ========================= | |
| # UPLOAD | |
| # ========================= | |
| def upload_and_clean(file): | |
| global df_global | |
| df = pd.read_csv(file.name) | |
| df = df.drop_duplicates() | |
| for col in df.columns: | |
| if pd.api.types.is_numeric_dtype(df[col]): | |
| df[col] = df[col].fillna(df[col].median()) | |
| else: | |
| df[col] = df[col].fillna(df[col].mode()[0]) | |
| df_global = df | |
| return ( | |
| "Data Loaded Successfully", | |
| df.head(), | |
| gr.update(choices=list(df.columns)), | |
| gr.update(choices=list(df.columns)) | |
| ) | |
| # ========================= | |
| # ANALYSIS VISUALIZATION | |
| # ========================= | |
| def analyze_data(target): | |
| df = df_global.copy() | |
| images = [] | |
| cols = [c for c in df.columns if c != target] | |
| for col in cols[:6]: | |
| fig, axes = plt.subplots(1, 2, figsize=(12, 4)) | |
| df[col].astype(str).value_counts().head(10).plot( | |
| kind="bar", | |
| ax=axes[0] | |
| ) | |
| axes[0].set_title(f"Bar - {col}") | |
| axes[0].tick_params(axis='x', rotation=45) | |
| df[col].astype(str).value_counts().head(6).plot( | |
| kind="pie", | |
| ax=axes[1], | |
| autopct="%1.1f%%" | |
| ) | |
| axes[1].set_title(f"Pie - {col}") | |
| axes[1].set_ylabel("") | |
| plt.tight_layout() | |
| path = f"/tmp/{col}.png" | |
| plt.savefig(path) | |
| plt.close() | |
| images.append(path) | |
| return images | |
| # ========================= | |
| # CONFUSION MATRIX | |
| # ========================= | |
| def plot_cm(y_true, y_pred, title): | |
| cm = confusion_matrix(y_true, y_pred) | |
| plt.figure(figsize=(4,4)) | |
| plt.imshow(cm, cmap="Blues") | |
| plt.title(title) | |
| for i in range(cm.shape[0]): | |
| for j in range(cm.shape[1]): | |
| plt.text(j, i, cm[i, j], ha="center", va="center") | |
| path = f"/tmp/{title}.png" | |
| plt.savefig(path) | |
| plt.close() | |
| return path | |
| # ========================= | |
| # ML (NO / CW / SMOTE) | |
| # ========================= | |
| def run_ml(target): | |
| global df_global, best_model_name | |
| global no_global, cw_global, smote_global, cm_global | |
| df = df_global.copy() | |
| # encode | |
| for col in df.columns: | |
| if not pd.api.types.is_numeric_dtype(df[col]): | |
| df[col] = LabelEncoder().fit_transform(df[col].astype(str)) | |
| X = df.drop(columns=[target]) | |
| y = df[target] | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X, y, test_size=0.2, random_state=42 | |
| ) | |
| # imbalance check | |
| counts = np.bincount(y) | |
| imbalance = min(counts) / max(counts) < 0.5 | |
| models = { | |
| "Decision Tree": DecisionTreeClassifier(), | |
| "Random Forest": RandomForestClassifier(), | |
| "XGBoost": XGBClassifier(eval_metric="logloss") | |
| } | |
| no_rows, cw_rows, smote_rows = [], [], [] | |
| cm_images = {} | |
| best_score = 0 | |
| # ========================= | |
| # NO SAMPLING | |
| # ========================= | |
| for name, model in models.items(): | |
| model.fit(X_train, y_train) | |
| pred = model.predict(X_test) | |
| acc = accuracy_score(y_test, pred) | |
| no_rows.append({ | |
| "Model": name, | |
| "Accuracy": acc, | |
| "Precision": precision_score(y_test, pred, average="weighted", zero_division=0), | |
| "Recall": recall_score(y_test, pred, average="weighted", zero_division=0), | |
| "F1": f1_score(y_test, pred, average="weighted", zero_division=0) | |
| }) | |
| cm_images[f"{name}_no"] = plot_cm(y_test, pred, f"{name}_NO") | |
| if acc > best_score: | |
| best_score = acc | |
| best_model_name = name + " (No)" | |
| # ========================= | |
| # CLASS WEIGHT | |
| # ========================= | |
| for name in models.keys(): | |
| if name == "Decision Tree": | |
| model = DecisionTreeClassifier(class_weight="balanced") | |
| elif name == "Random Forest": | |
| model = RandomForestClassifier(class_weight="balanced") | |
| else: | |
| model = XGBClassifier(eval_metric="logloss") | |
| model.fit(X_train, y_train) | |
| pred = model.predict(X_test) | |
| cw_rows.append({ | |
| "Model": name, | |
| "Accuracy": accuracy_score(y_test, pred), | |
| "Precision": precision_score(y_test, pred, average="weighted", zero_division=0), | |
| "Recall": recall_score(y_test, pred, average="weighted", zero_division=0), | |
| "F1": f1_score(y_test, pred, average="weighted", zero_division=0) | |
| }) | |
| cm_images[f"{name}_cw"] = plot_cm(y_test, pred, f"{name}_CW") | |
| # ========================= | |
| # SMOTE | |
| # ========================= | |
| if imbalance: | |
| sm = SMOTE(random_state=42) | |
| X_res, y_res = sm.fit_resample(X_train, y_train) | |
| else: | |
| X_res, y_res = X_train, y_train | |
| for name, model in models.items(): | |
| model.fit(X_res, y_res) | |
| pred = model.predict(X_test) | |
| smote_rows.append({ | |
| "Model": name, | |
| "Accuracy": accuracy_score(y_test, pred), | |
| "Precision": precision_score(y_test, pred, average="weighted", zero_division=0), | |
| "Recall": recall_score(y_test, pred, average="weighted", zero_division=0), | |
| "F1": f1_score(y_test, pred, average="weighted", zero_division=0) | |
| }) | |
| cm_images[f"{name}_smote"] = plot_cm(y_test, pred, f"{name}_SMOTE") | |
| # store globally | |
| no_global = pd.DataFrame(no_rows) | |
| cw_global = pd.DataFrame(cw_rows) | |
| smote_global = pd.DataFrame(smote_rows) | |
| cm_global = cm_images | |
| return ( | |
| f"Imbalance: {imbalance}", | |
| no_global, | |
| cw_global, | |
| smote_global, | |
| list(cm_images.values()) | |
| ) | |
| # ========================= | |
| # FEATURE IMPORTANCE | |
| # ========================= | |
| def feature_importance(): | |
| global best_model_obj | |
| if hasattr(best_model_obj, "feature_importances_"): | |
| plt.figure(figsize=(6,4)) | |
| plt.barh(range(len(best_model_obj.feature_importances_)), | |
| best_model_obj.feature_importances_) | |
| path = "/tmp/feat.png" | |
| plt.savefig(path) | |
| plt.close() | |
| return path | |
| return None | |
| # ========================= | |
| # PDF REPORT | |
| # ========================= | |
| def generate_pdf(): | |
| global no_global, cw_global, smote_global, cm_global, best_model_name | |
| path = "/tmp/report.pdf" | |
| doc = SimpleDocTemplate(path) | |
| styles = getSampleStyleSheet() | |
| elements = [] | |
| elements.append(Paragraph("AutoML Full Report", styles["Title"])) | |
| elements.append(Spacer(1, 10)) | |
| elements.append(Paragraph(f"Best Model: {best_model_name}", styles["Heading2"])) | |
| def add_table(df, title): | |
| elements.append(Spacer(1, 10)) | |
| elements.append(Paragraph(title, styles["Heading3"])) | |
| data = [df.columns.tolist()] + df.values.tolist() | |
| table = Table(data) | |
| table.setStyle(TableStyle([ | |
| ("BACKGROUND", (0,0), (-1,0), colors.grey), | |
| ("TEXTCOLOR", (0,0), (-1,0), colors.white), | |
| ("GRID", (0,0), (-1,-1), 0.5, colors.black) | |
| ])) | |
| elements.append(table) | |
| add_table(no_global, "No Sampling") | |
| add_table(cw_global, "Class Weight") | |
| add_table(smote_global, "SMOTE") | |
| elements.append(Spacer(1, 10)) | |
| elements.append(Paragraph("Confusion Matrices", styles["Heading2"])) | |
| for name, img in cm_global.items(): | |
| elements.append(Paragraph(name, styles["Normal"])) | |
| elements.append(Image(img, width=200, height=200)) | |
| doc.build(elements) | |
| return path | |
| # ========================= | |
| # ANALYSIS | |
| # ========================= | |
| def full_analysis(target): | |
| ml_status, no_df, cw_df, smote_df, imgs = run_ml(target) | |
| return ml_status, no_df, cw_df, smote_df, imgs | |
| # ========================= | |
| # UI | |
| # ========================= | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🚀 Advanced AutoML System") | |
| file = gr.File() | |
| upload_btn = gr.Button("Upload") | |
| status = gr.Textbox() | |
| preview = gr.Dataframe() | |
| target = gr.Dropdown(label="Target") | |
| run_btn = gr.Button("Run Full Analysis") | |
| ml_status = gr.Textbox() | |
| no_table = gr.Dataframe() | |
| cw_table = gr.Dataframe() | |
| smote_table = gr.Dataframe() | |
| gallery = gr.Gallery(columns=2) | |
| feat_btn = gr.Button("Feature Importance") | |
| feat_img = gr.Image() | |
| pdf_btn = gr.Button("Download PDF") | |
| pdf_file = gr.File() | |
| upload_btn.click(upload_and_clean, file, | |
| [status, preview, target, target]) | |
| run_btn.click(full_analysis, target, | |
| [ml_status, no_table, cw_table, smote_table, gallery]) | |
| feat_btn.click(feature_importance, None, feat_img) | |
| pdf_btn.click(generate_pdf, None, pdf_file) | |
| demo.launch(share=True) |