Upload 3 files

82dec99 verified 7 days ago

38.1 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import re
	import io
	import os
	import joblib
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt
	import seaborn as sns
	from datetime import datetime

	from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
	from sklearn.preprocessing import LabelEncoder, StandardScaler, RobustScaler
	from sklearn.metrics import (
	accuracy_score, confusion_matrix, silhouette_score,
	classification_report, f1_score, precision_score, recall_score
	)
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.svm import SVC
	from sklearn.linear_model import LogisticRegression
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.cluster import KMeans
	from sklearn.feature_selection import mutual_info_classif
	from sklearn.utils import resample

	# ==========================================================
	# PAGE CONFIG
	# ==========================================================
	st.set_page_config(
	page_title="AI AutoML Platform",
	page_icon="🤖",
	layout="wide"
	)

	# ==========================================================
	# SESSION STATE
	# ==========================================================
	if "history" not in st.session_state:
	st.session_state.history = []

	if "last_model_name" not in st.session_state:
	st.session_state.last_model_name = None

	if "last_score" not in st.session_state:
	st.session_state.last_score = None
	#store detailed results per model run for reports
	if "model_results" not in st.session_state:
	st.session_state.model_results = []

	#store selected target so report can reference it
	if "selected_target" not in st.session_state:
	st.session_state.selected_target = None

	# store the cleaned df reference for report generation
	if "cleaned_df" not in st.session_state:
	st.session_state.cleaned_df = None

	# ==========================================================
	# THEME CSS
	# ==========================================================
	st.markdown("""
	<style>
	.stApp {
	background: linear-gradient(135deg,#0f172a,#111827,#020617);
	color: white;
	}
	.big-title {
	font-size: 42px;
	font-weight: 800;
	color: #38bdf8;
	text-align:center;
	padding:15px;
	}
	.sub-title {
	text-align:center;
	color:#cbd5e1;
	font-size:18px;
	margin-bottom:25px;
	}
	.section {
	background:#0f172a;
	padding:12px;
	border-radius:12px;
	color:#38bdf8;
	font-weight:700;
	font-size:24px;
	margin-top:20px;
	}
	.stButton>button {
	background:#38bdf8;
	color:black;
	border:none;
	border-radius:10px;
	font-weight:700;
	}
	.stButton>button:hover {
	background:#0ea5e9;
	color:white;
	}
	div[data-baseweb="select"] > div {
	background:#1e293b !important;
	color:white !important;
	}
	.model-result-box {
	background:#1e293b;
	padding:20px;
	border-radius:12px;
	border:2px solid #38bdf8;
	margin:15px 0;
	}
	/* File Uploader Button */
	.stFileUploader>div>div>button {
	background:#38bdf8 !important;
	color:black !important;
	border:none !important;
	border-radius:10px !important;
	font-weight:700 !important;
	}
	.stFileUploader>div>div>button:hover {
	background:#0ea5e9 !important;
	color:white !important;
	}
	/* File Uploader Button Alternative Selectors */
	.stFileUploader button {
	background:#38bdf8 !important;
	color:black !important;
	border:none !important;
	border-radius:10px !important;
	font-weight:700 !important;
	}
	.stFileUploader button:hover {
	background:#0ea5e9 !important;
	color:white !important;
	}
	/* Download Buttons */
	.stDownloadButton>button {
	background:#38bdf8 !important;
	color:black !important;
	border:none !important;
	border-radius:10px !important;
	font-weight:700 !important;
	}
	.stDownloadButton>button:hover {
	background:#0ea5e9 !important;
	color:white !important;
	}
	/* File Uploader Label */
	.stFileUploader label {
	color:#38bdf8 !important;
	font-size:16px !important;
	font-weight:700 !important;
	}
	/* Selectbox Labels */
	.stSelectbox label {
	color:#38bdf8 !important;
	font-size:16px !important;
	font-weight:700 !important;
	}
	/* Text and Write Styling */
	p {
	color:#cbd5e1;
	}
	.stWrite {
	color:#cbd5e1;
	}
	/* Center pyplot figures and add lateral padding */
	.stPlotlyChart, .stPyplot {
	display: flex;
	justify-content: center;
	}
	.stPyplot {
	padding: 0 50px;
	}
	.stPlotlyChart {
	padding: 0 50px;
	}
	/* Centered containers */
	.stContainer {
	max-width: 95%;
	margin-left: auto;
	margin-right: auto;
	}
	/* Classification Report Text */
	.stText {
	color: white !important;
	}
	.stText pre {
	color: white !important;
	}
	.stText * {
	color: white !important;
	}
	</style>
	""", unsafe_allow_html=True)

	# ==========================================================
	# HEADER
	# ==========================================================
	st.markdown('<div class="big-title">🤖 AI AutoML Platform</div>', unsafe_allow_html=True)
	st.markdown('<div class="sub-title">upload csv select model download trained model</div>', unsafe_allow_html=True)

	# ==========================================================
	# HELPERS
	# ==========================================================
	def smart_clean(df):
	df = df.copy()
	df = df.drop_duplicates()

	for col in df.columns:
	if df[col].dtype == "object":
	df[col] = df[col].fillna(df[col].mode()[0])
	else:
	# use median instead of mean (more robust to outliers)
	df[col] = df[col].fillna(df[col].median())

	return df


	def convert_units(value):
	try:
	txt = str(value).lower().strip()

	nums = re.findall(r'[\d.]+', txt)
	if not nums:
	return value

	num = float(nums[0])

	if "km" in txt:
	return num * 1000
	elif "cm" in txt:
	return num / 100
	elif "mm" in txt:
	return num / 1000
	elif "m" in txt:
	return num
	else:
	return num
	except:
	return value


	def detect_unit_columns(df):
	df = df.copy()

	for col in df.columns:
	if df[col].dtype == "object":
	sample = str(df[col].iloc[0]).lower()

	if any(x in sample for x in ["km", "cm", "mm", " m"]):
	df[col] = df[col].apply(convert_units)

	return df


	def detect_best_target(df):
	scores = {}

	for col in df.columns:
	score = 0
	unique = df[col].nunique()
	ratio = unique / len(df)

	if 2 <= unique <= 15:
	score += 6

	if df[col].dtype == "object":
	score += 3

	if ratio > 0.9:
	score -= 10

	if unique > 50:
	score -= 5

	scores[col] = score

	best = max(scores, key=scores.get)
	ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)

	return best, ranked[:5]


	def prepare_for_supervised(df, target):
	data = df.copy()

	for col in data.columns:
	if data[col].dtype == "object":
	le = LabelEncoder()
	data[col] = le.fit_transform(data[col].astype(str))

	X = data.drop(columns=[target])
	y = data[target]

	return X, y, data


	# --- ACCURACY HELPER FUNCTIONS ---

	def clip_outliers_iqr(df):
	"""Clip outliers using IQR method instead of removing rows."""
	df = df.copy()
	info = {}
	for col in df.select_dtypes(include=[np.number]).columns:
	Q1 = df[col].quantile(0.25)
	Q3 = df[col].quantile(0.75)
	IQR = Q3 - Q1
	lower = Q1 - 1.5 * IQR
	upper = Q3 + 1.5 * IQR
	n_out = ((df[col] < lower) \| (df[col] > upper)).sum()
	if n_out > 0:
	df[col] = df[col].clip(lower=lower, upper=upper)
	info[col] = n_out
	return df, info


	def remove_low_variance(X, threshold=0.01):
	"""Remove features with near-zero variance."""
	variances = X.var()
	low = variances[variances < threshold].index.tolist()
	if low:
	X = X.drop(columns=low)
	return X, low


	def remove_high_correlation(X, threshold=0.95):
	"""Remove one of each pair of highly correlated features."""
	corr = X.corr().abs()
	upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool))
	to_drop = [c for c in upper.columns if any(upper[c] > threshold)]
	if to_drop:
	X = X.drop(columns=to_drop)
	return X, to_drop


	def balance_classes(X, y):
	"""Oversample minority classes to match majority count."""
	classes, counts = np.unique(y, return_counts=True)
	if len(classes) < 2:
	return X, y, False

	max_count = counts.max()
	ratio = max_count / counts.min()
	if ratio < 2:
	return X, y, False

	X_out = X.copy()
	y_out = y.copy()

	for cls, cnt in zip(classes, counts):
	if cnt < max_count:
	idx = y[y == cls].index
	extra = resample(X.loc[idx], replace=True, n_samples=max_count - cnt, random_state=42)
	y_extra = pd.Series([cls] * (max_count - cnt), index=extra.index)
	X_out = pd.concat([X_out, extra])
	y_out = pd.concat([y_out, y_extra])

	return X_out, y_out, True


	def select_top_features(X, y, max_features=20):
	"""Select top features by mutual information."""
	if X.shape[1] <= max_features:
	return X, list(X.columns)

	mi = mutual_info_classif(X, y, random_state=42)
	top = pd.Series(mi, index=X.columns).sort_values(ascending=False).head(max_features).index.tolist()
	return X[top], top


	def preprocess_for_model(df, target):
	"""Full accuracy-boosting preprocessing pipeline."""
	X, y, transformed = prepare_for_supervised(df, target)

	# Clip outliers
	transformed_clipped, outlier_info = clip_outliers_iqr(transformed)
	X = transformed_clipped.drop(columns=[target])
	y = transformed_clipped[target]

	# Remove low variance
	X, low_var = remove_low_variance(X)

	# Remove high correlation
	X, high_corr = remove_high_correlation(X)

	# Balance classes
	X, y, balanced = balance_classes(X, y)

	# Feature selection
	X, selected = select_top_features(X, y)

	return X, y, transformed, {
	"outliers_clipped": outlier_info,
	"low_var_removed": low_var,
	"high_corr_removed": high_corr,
	"class_balanced": balanced,
	"features_used": list(X.columns),
	}


	def show_confusion(y_true, y_pred, title):
	fig, ax = plt.subplots(figsize=(5,4))
	cm = confusion_matrix(y_true, y_pred)

	sns.heatmap(
	cm,
	annot=True,
	fmt="d",
	cmap="Blues",
	linewidths=1
	)

	plt.title(title)
	plt.xlabel("Predicted")
	plt.ylabel("Actual")

	col1, col2, col3 = st.columns([1, 2, 1])
	with col2:
	st.pyplot(fig)
	return fig


	def compact_bar(labels, values, title):
	fig, ax = plt.subplots(figsize=(6,3))

	sns.barplot(x=labels, y=values)

	plt.xticks(rotation=20)
	plt.title(title)

	col1, col2, col3 = st.columns([1, 2, 1])
	with col2:
	st.pyplot(fig)
	return fig


	def save_result(name, score, target_col, features_used, extra_info=None):
	"""Enhanced save_result that stores all details for reporting."""
	st.session_state.last_model_name = name
	st.session_state.last_score = score

	entry = {
	"Model": name,
	"Score": score,
	"Target": target_col,
	"Features": features_used,
	"Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
	}

	if extra_info:
	entry.update(extra_info)

	st.session_state.history.append(entry)
	st.session_state.model_results.append(entry)


	# --- REPORT GENERATORS ---

	def generate_text_report(df, target, model_results):
	"""Generate a comprehensive TXT report with every detail."""
	best = max(model_results, key=lambda x: x["Score"]) if model_results else None

	lines = []
	lines.append("=" * 70)
	lines.append(" DARK AI AUTOML PLATFORM - FULL REPORT")
	lines.append("=" * 70)
	lines.append(f" Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
	lines.append("")
	lines.append("-" * 70)
	lines.append(" DATASET SUMMARY")
	lines.append("-" * 70)
	lines.append(f" Rows: {df.shape[0]}")
	lines.append(f" Columns: {df.shape[1]}")
	lines.append(f" Target Column: {target}")
	lines.append(f" Target Unique Values: {df[target].nunique()}")
	lines.append("")

	lines.append("-" * 70)
	lines.append(" COLUMN DETAILS")
	lines.append("-" * 70)
	for col in df.columns:
	dtype = str(df[col].dtype)
	nunique = df[col].nunique()
	missing = df[col].isnull().sum()
	lines.append(f" {col}: type={dtype}, unique={nunique}, missing={missing}")
	lines.append("")

	lines.append("-" * 70)
	lines.append(" MODEL RESULTS (ALL RUNS)")
	lines.append("-" * 70)
	for i, r in enumerate(model_results, 1):
	lines.append("")
	lines.append(f" Run #{i}")
	lines.append(f" Model: {r['Model']}")
	lines.append(f" Accuracy/Score: {r['Score']:.2f}%")
	lines.append(f" Target Feature: {r.get('Target', 'N/A')}")
	lines.append(f" Features Used: {r.get('Features', 'N/A')}")
	lines.append(f" Timestamp: {r.get('Timestamp', 'N/A')}")
	if "Precision" in r:
	lines.append(f" Precision: {r['Precision']:.2f}%")
	if "Recall" in r:
	lines.append(f" Recall: {r['Recall']:.2f}%")
	if "F1Score" in r:
	lines.append(f" F1 Score: {r['F1Score']:.2f}%")
	if "BestParams" in r:
	lines.append(f" Best Hyperparameters: {r['BestParams']}")
	if "OutliersClipped" in r:
	lines.append(f" Outliers Clipped: {r['OutliersClipped']} columns")
	if "LowVarRemoved" in r:
	lines.append(f" Low Variance Features Removed: {r['LowVarRemoved']}")
	if "HighCorrRemoved" in r:
	lines.append(f" High Correlation Features Removed: {r['HighCorrRemoved']}")
	if "ClassBalanced" in r:
	lines.append(f" Class Balancing Applied: {r['ClassBalanced']}")
	if "BestK" in r:
	lines.append(f" Optimal Clusters (k): {r['BestK']}")

	if best:
	lines.append("")
	lines.append("-" * 70)
	lines.append(" BEST MODEL")
	lines.append("-" * 70)
	lines.append(f" Model: {best['Model']}")
	lines.append(f" Score: {best['Score']:.2f}%")
	lines.append(f" Target: {best.get('Target', 'N/A')}")

	lines.append("")
	lines.append("-" * 70)
	lines.append(" PREPROCESSING PIPELINE")
	lines.append("-" * 70)
	lines.append(" - Duplicate removal")
	lines.append(" - Missing values handled (median for numeric, mode for categorical)")
	lines.append(" - Unit conversion (km/cm/mm -> m)")
	lines.append(" - Categorical encoding (LabelEncoder)")
	lines.append(" - Outlier clipping (IQR method)")
	lines.append(" - Low variance feature removal")
	lines.append(" - High correlation feature removal")
	lines.append(" - Class imbalance handling (oversampling)")
	lines.append(" - Feature selection (mutual information, top 20)")
	lines.append(" - Scaling where required (StandardScaler / RobustScaler)")
	lines.append(" - Hyperparameter tuning (GridSearchCV)")
	lines.append(" - Stratified cross-validation (5-fold)")
	lines.append("")
	lines.append("=" * 70)
	lines.append(" END OF REPORT")
	lines.append("=" * 70)

	return "\n".join(lines)


	def generate_xlsx_report(df, target, model_results):
	"""Generate a multi-sheet XLSX report with every detail."""
	output = io.BytesIO()

	with pd.ExcelWriter(output, engine="openpyxl") as writer:
	# Sheet 1: Dataset Summary
	summary = pd.DataFrame({
	"Property": ["Rows", "Columns", "Target Column", "Target Unique Values"],
	"Value": [df.shape[0], df.shape[1], target, df[target].nunique()]
	})
	summary.to_excel(writer, sheet_name="Dataset Summary", index=False)

	# Sheet 2: Column Details
	col_details = []
	for col in df.columns:
	col_details.append({
	"Column": col,
	"Type": str(df[col].dtype),
	"Unique Values": df[col].nunique(),
	"Missing Values": df[col].isnull().sum(),
	})
	pd.DataFrame(col_details).to_excel(writer, sheet_name="Column Details", index=False)

	# Sheet 3: Model Results
	results_df = pd.DataFrame(model_results)
	results_df.to_excel(writer, sheet_name="Model Results", index=False)

	# Sheet 4: Best Model
	if model_results:
	best = max(model_results, key=lambda x: x["Score"])
	pd.DataFrame([best]).to_excel(writer, sheet_name="Best Model", index=False)

	output.seek(0)
	return output


	# ==========================================================
	# UPLOAD
	# ==========================================================
	st.markdown('<div class="section">📁 Upload Dataset</div>', unsafe_allow_html=True)

	file = st.file_uploader("Upload CSV File", type=["csv"])

	# ==========================================================
	# MAIN APP
	# ==========================================================
	if file:

	raw = pd.read_csv(file)

	st.markdown('<div class="section">📌 Dataset Preview</div>', unsafe_allow_html=True)
	st.dataframe(raw.head(), use_container_width=True)

	df = smart_clean(raw)
	df = detect_unit_columns(df)

	st.session_state.cleaned_df = df

	# ------------------------------------------------------
	# TARGET DETECTION
	# ------------------------------------------------------
	st.markdown('<div class="section">🎯 AI Target Detection</div>', unsafe_allow_html=True)

	best_target, top5 = detect_best_target(df)

	st.success(f"Recommended Target Column: {best_target}")

	st.write("Top Suggestions:")

	for n, s in top5:
	st.write(f"• {n} (score: {s})")

	# Dropdown with AI recommendation pre-selected, user can override
	target = st.selectbox(
	"Choose Target Column (AI recommended is pre-selected - change if needed)",
	[best_target] + [c for c in df.columns if c != best_target]
	)

	st.session_state.selected_target = target

	# ------------------------------------------------------
	# MODEL SELECT
	# ------------------------------------------------------
	st.markdown('<div class="section">🤖 Choose Model</div>', unsafe_allow_html=True)

	model_choice = st.selectbox(
	"Select One Model",
	[
	"Random Forest",
	"SVM",
	"Logistic Regression",
	"Decision Tree",
	"KMeans Clustering"
	]
	)

	# ------------------------------------------------------
	# APPLY MODEL
	# ------------------------------------------------------
	if st.button("🚀 Apply Model"):

	# Each model result is in its own container so
	# applying a second model shows results separately beneath the first

	# RANDOM FOREST
	if model_choice == "Random Forest":

	X, y, transformed, pp_info = preprocess_for_model(df, target)
	features_used = pp_info["features_used"]

	result_box = st.container()
	with result_box:
	st.markdown('<div class="model-result-box">', unsafe_allow_html=True)
	st.markdown(f"### Random Forest Results (Target: {target})")

	col1, col2 = st.columns(2)

	with col1:
	st.write("Original")
	st.dataframe(raw.head())

	with col2:
	st.write("Processed")
	st.dataframe(transformed.head())

	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.2, random_state=42, stratify=y
	)

	cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

	model = GridSearchCV(
	RandomForestClassifier(),
	{
	"n_estimators":[100,200,300],
	"max_depth":[5,10,15,None],
	"min_samples_split":[2,5],
	"min_samples_leaf":[1,2]
	},
	cv=cv,
	n_jobs=-1
	)

	model.fit(X_train, y_train)

	pred = model.predict(X_test)

	acc = accuracy_score(y_test, pred)*100
	prec = precision_score(y_test, pred, average="weighted", zero_division=0)*100
	rec = recall_score(y_test, pred, average="weighted", zero_division=0)*100
	f1 = f1_score(y_test, pred, average="weighted", zero_division=0)*100

	st.success(f"Accuracy: {acc:.2f}%")
	st.info(f"Precision: {prec:.2f}% \| Recall: {rec:.2f}% \| F1: {f1:.2f}%")

	show_confusion(y_test, pred, "Random Forest Matrix")

	imp = pd.Series(
	model.best_estimator_.feature_importances_,
	index=X.columns
	).sort_values(ascending=False).head(8)

	compact_bar(imp.index, imp.values, "Feature Importance")

	st.write("Classification Report:")
	st.text(classification_report(y_test, pred, zero_division=0))

	st.markdown('</div>', unsafe_allow_html=True)

	joblib.dump(model.best_estimator_, "random_forest.pkl")

	save_result("Random Forest", acc, target, ", ".join(features_used), {
	"Precision": prec,
	"Recall": rec,
	"F1Score": f1,
	"BestParams": str(model.best_params_),
	"OutliersClipped": len(pp_info["outliers_clipped"]),
	"LowVarRemoved": str(pp_info["low_var_removed"]),
	"HighCorrRemoved": str(pp_info["high_corr_removed"]),
	"ClassBalanced": pp_info["class_balanced"],
	})

	# SVM
	elif model_choice == "SVM":

	X, y, transformed, pp_info = preprocess_for_model(df, target)
	features_used = pp_info["features_used"]

	result_box = st.container()
	with result_box:
	st.markdown('<div class="model-result-box">', unsafe_allow_html=True)
	st.markdown(f"### SVM Results (Target: {target})")

	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.2, random_state=42, stratify=y
	)

	# RobustScaler for SVM (handles outliers better)
	sc = RobustScaler()

	X_train = sc.fit_transform(X_train)
	X_test = sc.transform(X_test)

	cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

	model = GridSearchCV(
	SVC(),
	{
	"C":[0.1,1,10,100],
	"kernel":["rbf","linear","poly"],
	"gamma":["scale","auto"]
	},
	cv=cv,
	n_jobs=-1
	)

	model.fit(X_train, y_train)

	pred = model.predict(X_test)

	acc = accuracy_score(y_test, pred)*100
	prec = precision_score(y_test, pred, average="weighted", zero_division=0)*100
	rec = recall_score(y_test, pred, average="weighted", zero_division=0)*100
	f1 = f1_score(y_test, pred, average="weighted", zero_division=0)*100

	st.success(f"Accuracy: {acc:.2f}%")
	st.info(f"Precision: {prec:.2f}% \| Recall: {rec:.2f}% \| F1: {f1:.2f}%")

	show_confusion(y_test, pred, "SVM Matrix")

	st.write("Classification Report:")
	st.text(classification_report(y_test, pred, zero_division=0))

	st.markdown('</div>', unsafe_allow_html=True)

	joblib.dump(model.best_estimator_, "svm.pkl")

	save_result("SVM", acc, target, ", ".join(features_used), {
	"Precision": prec,
	"Recall": rec,
	"F1Score": f1,
	"BestParams": str(model.best_params_),
	"OutliersClipped": len(pp_info["outliers_clipped"]),
	"LowVarRemoved": str(pp_info["low_var_removed"]),
	"HighCorrRemoved": str(pp_info["high_corr_removed"]),
	"ClassBalanced": pp_info["class_balanced"],
	})

	# LOGISTIC
	elif model_choice == "Logistic Regression":

	X, y, transformed, pp_info = preprocess_for_model(df, target)
	features_used = pp_info["features_used"]

	result_box = st.container()
	with result_box:
	st.markdown('<div class="model-result-box">', unsafe_allow_html=True)
	st.markdown(f"### Logistic Regression Results (Target: {target})")

	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.2, random_state=42, stratify=y
	)

	sc = StandardScaler()

	X_train = sc.fit_transform(X_train)
	X_test = sc.transform(X_test)

	cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

	model = GridSearchCV(
	LogisticRegression(max_iter=5000, solver="liblinear"),
	{
	"C":[0.01,0.1,1,10,100],
	"penalty":["l1","l2"]
	},
	cv=cv,
	n_jobs=-1
	)

	model.fit(X_train, y_train)

	pred = model.predict(X_test)

	acc = accuracy_score(y_test, pred)*100
	prec = precision_score(y_test, pred, average="weighted", zero_division=0)*100
	rec = recall_score(y_test, pred, average="weighted", zero_division=0)*100
	f1 = f1_score(y_test, pred, average="weighted", zero_division=0)*100

	st.success(f"Accuracy: {acc:.2f}%")
	st.info(f"Precision: {prec:.2f}% \| Recall: {rec:.2f}% \| F1: {f1:.2f}%")

	show_confusion(y_test, pred, "Logistic Regression Matrix")

	# Show coefficient magnitudes for logistic regression
	if hasattr(model.best_estimator_, "coef_"):
	coef = pd.Series(
	np.abs(model.best_estimator_.coef_[0]),
	index=X.columns
	).sort_values(ascending=False).head(8)
	compact_bar(coef.index, coef.values, "Feature Coefficients (Absolute)")

	st.write("Classification Report:")
	st.text(classification_report(y_test, pred, zero_division=0))

	st.markdown('</div>', unsafe_allow_html=True)

	joblib.dump(model.best_estimator_, "logistic.pkl")

	save_result("Logistic Regression", acc, target, ", ".join(features_used), {
	"Precision": prec,
	"Recall": rec,
	"F1Score": f1,
	"BestParams": str(model.best_params_),
	"OutliersClipped": len(pp_info["outliers_clipped"]),
	"LowVarRemoved": str(pp_info["low_var_removed"]),
	"HighCorrRemoved": str(pp_info["high_corr_removed"]),
	"ClassBalanced": pp_info["class_balanced"],
	})

	# DECISION TREE
	elif model_choice == "Decision Tree":

	X, y, transformed, pp_info = preprocess_for_model(df, target)
	features_used = pp_info["features_used"]

	result_box = st.container()
	with result_box:
	st.markdown('<div class="model-result-box">', unsafe_allow_html=True)
	st.markdown(f"### Decision Tree Results (Target: {target})")

	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.2, random_state=42, stratify=y
	)

	cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

	model = GridSearchCV(
	DecisionTreeClassifier(),
	{
	"max_depth":[3,5,10,15,None],
	"min_samples_split":[2,5,10],
	"min_samples_leaf":[1,2,4],
	"criterion":["gini","entropy"]
	},
	cv=cv,
	n_jobs=-1
	)

	model.fit(X_train, y_train)

	pred = model.predict(X_test)

	acc = accuracy_score(y_test, pred)*100
	prec = precision_score(y_test, pred, average="weighted", zero_division=0)*100
	rec = recall_score(y_test, pred, average="weighted", zero_division=0)*100
	f1 = f1_score(y_test, pred, average="weighted", zero_division=0)*100

	st.success(f"Accuracy: {acc:.2f}%")
	st.info(f"Precision: {prec:.2f}% \| Recall: {rec:.2f}% \| F1: {f1:.2f}%")

	show_confusion(y_test, pred, "Decision Tree Matrix")

	# Feature importance for decision tree
	imp = pd.Series(
	model.best_estimator_.feature_importances_,
	index=X.columns
	).sort_values(ascending=False).head(8)
	compact_bar(imp.index, imp.values, "Feature Importance")

	st.write("Classification Report:")
	st.text(classification_report(y_test, pred, zero_division=0))

	st.markdown('</div>', unsafe_allow_html=True)

	joblib.dump(model.best_estimator_, "decision_tree.pkl")

	save_result("Decision Tree", acc, target, ", ".join(features_used), {
	"Precision": prec,
	"Recall": rec,
	"F1Score": f1,
	"BestParams": str(model.best_params_),
	"OutliersClipped": len(pp_info["outliers_clipped"]),
	"LowVarRemoved": str(pp_info["low_var_removed"]),
	"HighCorrRemoved": str(pp_info["high_corr_removed"]),
	"ClassBalanced": pp_info["class_balanced"],
	})

	# KMEANS
	elif model_choice == "KMeans Clustering":

	temp = df.copy()

	for col in temp.columns:
	if temp[col].dtype == "object":
	le = LabelEncoder()
	temp[col] = le.fit_transform(temp[col].astype(str))

	X = temp.drop(columns=[target])

	# Clip outliers for clustering too
	temp_clipped, outlier_info = clip_outliers_iqr(temp)
	X_clipped = temp_clipped.drop(columns=[target])

	sc = StandardScaler()
	Xs = sc.fit_transform(X_clipped)

	# Find optimal k using elbow method
	inertias = []
	K_range = range(2, min(11, len(df) // 10 + 1))
	for k in K_range:
	km = KMeans(n_clusters=k, random_state=42, n_init=10)
	km.fit(Xs)
	inertias.append(km.inertia_)

	best_k = 3
	if len(inertias) >= 3:
	diffs = [inertias[i] - inertias[i+1] for i in range(len(inertias)-1)]
	if diffs:
	elbow_idx = np.argmax(diffs) + 1
	best_k = list(K_range)[elbow_idx] if elbow_idx < len(list(K_range)) else 3
	best_k = max(2, min(best_k, 10))

	result_box = st.container()
	with result_box:
	st.markdown('<div class="model-result-box">', unsafe_allow_html=True)
	st.markdown(f"### KMeans Clustering Results (Target: {target})")

	model = KMeans(n_clusters=best_k, random_state=42, n_init=10)

	cluster = model.fit_predict(Xs)

	score = silhouette_score(Xs, cluster)*100

	st.success(f"Cluster Quality Score: {score:.2f}% (k={best_k})")

	fig, ax = plt.subplots(figsize=(6,4))
	plt.scatter(Xs[:,0], Xs[:,1], c=cluster, cmap="viridis")
	plt.title(f"Clusters (k={best_k})")
	col1, col2, col3 = st.columns([1, 2, 1])
	with col2:
	st.pyplot(fig)

	# Elbow plot
	fig2, ax2 = plt.subplots(figsize=(6,3))
	plt.plot(list(K_range), inertias, "bo-")
	plt.xlabel("Number of Clusters (k)")
	plt.ylabel("Inertia")
	plt.title("Elbow Method")
	col1, col2, col3 = st.columns([1, 2, 1])
	with col2:
	st.pyplot(fig2)

	# Cluster distribution
	cluster_counts = pd.Series(cluster).value_counts().sort_index()
	fig3, ax3 = plt.subplots(figsize=(6,3))
	sns.barplot(x=cluster_counts.index, y=cluster_counts.values)
	plt.xlabel("Cluster")
	plt.ylabel("Count")
	plt.title("Cluster Distribution")
	col1, col2, col3 = st.columns([1, 2, 1])
	with col2:
	st.pyplot(fig3)

	st.markdown('</div>', unsafe_allow_html=True)

	joblib.dump(model, "kmeans.pkl")

	save_result("KMeans Clustering", score, target, ", ".join(X_clipped.columns), {
	"BestK": best_k,
	"OutliersClipped": len(outlier_info),
	})

	# ==========================================================
	# DOWNLOAD SECTION
	# ==========================================================
	if st.session_state.last_model_name:

	st.markdown('<div class="section">⬇ Downloads</div>', unsafe_allow_html=True)

	file_map = {
	"Random Forest":"random_forest.pkl",
	"SVM":"svm.pkl",
	"Logistic Regression":"logistic.pkl",
	"Decision Tree":"decision_tree.pkl",
	"KMeans Clustering":"kmeans.pkl"
	}

	current = file_map[st.session_state.last_model_name]

	if os.path.exists(current):

	with open(current, "rb") as f:
	st.download_button(
	label=f"Download {st.session_state.last_model_name} (Deploy Ready)",
	data=f,
	file_name=current,
	mime="application/octet-stream"
	)

	# ==========================================================
	# HISTORY + REPORTS
	# ==========================================================
	if len(st.session_state.history) > 0:

	st.markdown('<div class="section">📊 History</div>', unsafe_allow_html=True)

	hist = pd.DataFrame(st.session_state.history)

	st.dataframe(hist, use_container_width=True)

	fig, ax = plt.subplots(figsize=(6,3))
	sns.barplot(data=hist, x="Model", y="Score")
	plt.xticks(rotation=20)
	plt.title("All Applied Models")
	col1, col2, col3 = st.columns([1, 2, 1])
	with col2:
	st.pyplot(fig)

	# CSV
	csv_buffer = io.StringIO()
	hist.to_csv(csv_buffer, index=False)

	st.download_button(
	"Download Results CSV",
	csv_buffer.getvalue(),
	"results.csv"
	)

	# TXT report
	if st.session_state.cleaned_df is not None and len(st.session_state.model_results) > 0:
	report_text = generate_text_report(
	st.session_state.cleaned_df,
	st.session_state.selected_target or "unknown",
	st.session_state.model_results
	)

	st.download_button(
	"Download Full Report (TXT)",
	report_text,
	"full_report.txt",
	mime="text/plain"
	)

	# XLSX report
	try:
	xlsx_data = generate_xlsx_report(
	st.session_state.cleaned_df,
	st.session_state.selected_target or "unknown",
	st.session_state.model_results
	)
	st.download_button(
	"Download Full Report (XLSX)",
	data=xlsx_data.getvalue(),
	file_name="full_report.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
	)
	except Exception:
	pass

	# ==========================================================
	# RESET
	# ==========================================================
	st.markdown('<div class="section">♻ Reset</div>', unsafe_allow_html=True)

	if st.button("Clear History"):

	st.session_state.history = []
	st.session_state.last_model_name = None
	st.session_state.last_score = None
	st.session_state.model_results = []
	st.session_state.selected_target = None
	st.session_state.cleaned_df = None

	st.success("History Cleared")