Spaces:

shimaa22
/

analysis_web

Running

App Files Files Community

analysis_web / app.py

shimaa22

Update app.py

d4b1c85 verified about 11 hours ago

raw

history blame contribute delete

9.54 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt

	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import LabelEncoder

	from sklearn.tree import DecisionTreeClassifier
	from sklearn.ensemble import RandomForestClassifier
	from xgboost import XGBClassifier

	from sklearn.metrics import (
	accuracy_score,
	precision_score,
	recall_score,
	f1_score,
	confusion_matrix
	)

	from imblearn.over_sampling import SMOTE

	from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle
	from reportlab.lib import colors
	from reportlab.lib.styles import getSampleStyleSheet

	# =========================
	# GLOBALS
	# =========================
	df_global = None
	best_model_name = None
	best_model_obj = None

	no_global = None
	cw_global = None
	smote_global = None
	cm_global = None


	# =========================
	# UPLOAD
	# =========================
	def upload_and_clean(file):

	global df_global

	df = pd.read_csv(file.name)
	df = df.drop_duplicates()

	for col in df.columns:
	if pd.api.types.is_numeric_dtype(df[col]):
	df[col] = df[col].fillna(df[col].median())
	else:
	df[col] = df[col].fillna(df[col].mode()[0])

	df_global = df

	return (
	"Data Loaded Successfully",
	df.head(),
	gr.update(choices=list(df.columns)),
	gr.update(choices=list(df.columns))
	)


	# =========================
	# ANALYSIS VISUALIZATION
	# =========================
	def analyze_data(target):

	df = df_global.copy()
	images = []

	cols = [c for c in df.columns if c != target]

	for col in cols[:6]:

	fig, axes = plt.subplots(1, 2, figsize=(12, 4))

	df[col].astype(str).value_counts().head(10).plot(
	kind="bar",
	ax=axes[0]
	)
	axes[0].set_title(f"Bar - {col}")
	axes[0].tick_params(axis='x', rotation=45)

	df[col].astype(str).value_counts().head(6).plot(
	kind="pie",
	ax=axes[1],
	autopct="%1.1f%%"
	)
	axes[1].set_title(f"Pie - {col}")
	axes[1].set_ylabel("")

	plt.tight_layout()

	path = f"/tmp/{col}.png"
	plt.savefig(path)
	plt.close()

	images.append(path)

	return images


	# =========================
	# CONFUSION MATRIX
	# =========================
	def plot_cm(y_true, y_pred, title):

	cm = confusion_matrix(y_true, y_pred)

	plt.figure(figsize=(4,4))
	plt.imshow(cm, cmap="Blues")

	plt.title(title)

	for i in range(cm.shape[0]):
	for j in range(cm.shape[1]):
	plt.text(j, i, cm[i, j], ha="center", va="center")

	path = f"/tmp/{title}.png"
	plt.savefig(path)
	plt.close()

	return path


	# =========================
	# ML (NO / CW / SMOTE)
	# =========================
	def run_ml(target):

	global df_global, best_model_name
	global no_global, cw_global, smote_global, cm_global

	df = df_global.copy()

	# encode
	for col in df.columns:
	if not pd.api.types.is_numeric_dtype(df[col]):
	df[col] = LabelEncoder().fit_transform(df[col].astype(str))

	X = df.drop(columns=[target])
	y = df[target]

	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.2, random_state=42
	)

	# imbalance check
	counts = np.bincount(y)
	imbalance = min(counts) / max(counts) < 0.5

	models = {
	"Decision Tree": DecisionTreeClassifier(),
	"Random Forest": RandomForestClassifier(),
	"XGBoost": XGBClassifier(eval_metric="logloss")
	}

	no_rows, cw_rows, smote_rows = [], [], []
	cm_images = {}

	best_score = 0

	# =========================
	# NO SAMPLING
	# =========================
	for name, model in models.items():

	model.fit(X_train, y_train)
	pred = model.predict(X_test)

	acc = accuracy_score(y_test, pred)

	no_rows.append({
	"Model": name,
	"Accuracy": acc,
	"Precision": precision_score(y_test, pred, average="weighted", zero_division=0),
	"Recall": recall_score(y_test, pred, average="weighted", zero_division=0),
	"F1": f1_score(y_test, pred, average="weighted", zero_division=0)
	})

	cm_images[f"{name}_no"] = plot_cm(y_test, pred, f"{name}_NO")

	if acc > best_score:
	best_score = acc
	best_model_name = name + " (No)"

	# =========================
	# CLASS WEIGHT
	# =========================
	for name in models.keys():

	if name == "Decision Tree":
	model = DecisionTreeClassifier(class_weight="balanced")
	elif name == "Random Forest":
	model = RandomForestClassifier(class_weight="balanced")
	else:
	model = XGBClassifier(eval_metric="logloss")

	model.fit(X_train, y_train)
	pred = model.predict(X_test)

	cw_rows.append({
	"Model": name,
	"Accuracy": accuracy_score(y_test, pred),
	"Precision": precision_score(y_test, pred, average="weighted", zero_division=0),
	"Recall": recall_score(y_test, pred, average="weighted", zero_division=0),
	"F1": f1_score(y_test, pred, average="weighted", zero_division=0)
	})

	cm_images[f"{name}_cw"] = plot_cm(y_test, pred, f"{name}_CW")

	# =========================
	# SMOTE
	# =========================
	if imbalance:
	sm = SMOTE(random_state=42)
	X_res, y_res = sm.fit_resample(X_train, y_train)
	else:
	X_res, y_res = X_train, y_train

	for name, model in models.items():

	model.fit(X_res, y_res)
	pred = model.predict(X_test)

	smote_rows.append({
	"Model": name,
	"Accuracy": accuracy_score(y_test, pred),
	"Precision": precision_score(y_test, pred, average="weighted", zero_division=0),
	"Recall": recall_score(y_test, pred, average="weighted", zero_division=0),
	"F1": f1_score(y_test, pred, average="weighted", zero_division=0)
	})

	cm_images[f"{name}_smote"] = plot_cm(y_test, pred, f"{name}_SMOTE")

	# store globally
	no_global = pd.DataFrame(no_rows)
	cw_global = pd.DataFrame(cw_rows)
	smote_global = pd.DataFrame(smote_rows)
	cm_global = cm_images

	return (
	f"Imbalance: {imbalance}",
	no_global,
	cw_global,
	smote_global,
	list(cm_images.values())
	)


	# =========================
	# FEATURE IMPORTANCE
	# =========================
	def feature_importance():

	global best_model_obj

	if hasattr(best_model_obj, "feature_importances_"):

	plt.figure(figsize=(6,4))
	plt.barh(range(len(best_model_obj.feature_importances_)),
	best_model_obj.feature_importances_)

	path = "/tmp/feat.png"
	plt.savefig(path)
	plt.close()

	return path

	return None


	# =========================
	# PDF REPORT
	# =========================
	def generate_pdf():

	global no_global, cw_global, smote_global, cm_global, best_model_name

	path = "/tmp/report.pdf"
	doc = SimpleDocTemplate(path)
	styles = getSampleStyleSheet()
	elements = []

	elements.append(Paragraph("AutoML Full Report", styles["Title"]))
	elements.append(Spacer(1, 10))
	elements.append(Paragraph(f"Best Model: {best_model_name}", styles["Heading2"]))

	def add_table(df, title):
	elements.append(Spacer(1, 10))
	elements.append(Paragraph(title, styles["Heading3"]))

	data = [df.columns.tolist()] + df.values.tolist()

	table = Table(data)
	table.setStyle(TableStyle([
	("BACKGROUND", (0,0), (-1,0), colors.grey),
	("TEXTCOLOR", (0,0), (-1,0), colors.white),
	("GRID", (0,0), (-1,-1), 0.5, colors.black)
	]))

	elements.append(table)

	add_table(no_global, "No Sampling")
	add_table(cw_global, "Class Weight")
	add_table(smote_global, "SMOTE")

	elements.append(Spacer(1, 10))
	elements.append(Paragraph("Confusion Matrices", styles["Heading2"]))

	for name, img in cm_global.items():
	elements.append(Paragraph(name, styles["Normal"]))
	elements.append(Image(img, width=200, height=200))

	doc.build(elements)

	return path


	# =========================
	# ANALYSIS
	# =========================
	def full_analysis(target):

	ml_status, no_df, cw_df, smote_df, imgs = run_ml(target)

	return ml_status, no_df, cw_df, smote_df, imgs


	# =========================
	# UI
	# =========================
	with gr.Blocks() as demo:

	gr.Markdown("# 🚀 Advanced AutoML System")

	file = gr.File()

	upload_btn = gr.Button("Upload")

	status = gr.Textbox()
	preview = gr.Dataframe()

	target = gr.Dropdown(label="Target")

	run_btn = gr.Button("Run Full Analysis")

	ml_status = gr.Textbox()

	no_table = gr.Dataframe()
	cw_table = gr.Dataframe()
	smote_table = gr.Dataframe()

	gallery = gr.Gallery(columns=2)

	feat_btn = gr.Button("Feature Importance")
	feat_img = gr.Image()

	pdf_btn = gr.Button("Download PDF")
	pdf_file = gr.File()

	upload_btn.click(upload_and_clean, file,
	[status, preview, target, target])

	run_btn.click(full_analysis, target,
	[ml_status, no_table, cw_table, smote_table, gallery])

	feat_btn.click(feature_importance, None, feat_img)

	pdf_btn.click(generate_pdf, None, pdf_file)

	demo.launch(share=True)