Spaces:

asenturisk
/

Benchmark-Kit-26

Sleeping

App Files Files Community

Benchmark-Kit-26 / src /benchmarking.py

dwmk

Update src/benchmarking.py

bca40b9 verified 14 days ago

raw

history blame contribute delete

5.17 kB

	# benchmarking.py
	import streamlit as st
	import pandas as pd
	import numpy as np
	import plotly.express as px
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
	from sklearn.compose import ColumnTransformer
	from sklearn.pipeline import Pipeline
	from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
	from sklearn.linear_model import LogisticRegression
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.svm import SVC

	def run_benchmarking():
	st.header("⚖️ Professional Model Benchmarking")

	df = st.session_state.processed_df
	target_col = st.session_state.target_col
	feature_cols = st.session_state.feature_cols

	# Validation
	if not target_col or target_col == "None":
	st.error("⚠️ Please select a Target variable in the EDA tab.")
	return
	if not feature_cols:
	st.error("⚠️ Please select Feature variables in the EDA tab.")
	return

	# Data Preparation
	X = df[feature_cols]
	y = df[target_col]

	# Detect task type (Binary vs Multiclass)
	y_nunique = y.nunique()
	is_binary = y_nunique == 2

	# Encode Target
	le = LabelEncoder()
	y_encoded = le.fit_transform(y)
	class_names = [str(c) for c in le.classes_]

	# Train/Test Split
	test_size = st.slider("Test Split Size", 0.1, 0.5, 0.2)
	X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=test_size, random_state=42)

	# ---------------- Model Configuration ----------------
	available_models = {
	"Logistic Regression": LogisticRegression(max_iter=1000),
	"Random Forest": RandomForestClassifier(n_estimators=100),
	"SVM": SVC(probability=True)
	}

	selected_models = st.multiselect("Select Models to Benchmark", list(available_models.keys()), default=["Random Forest"])

	if st.button("🚀 Run Benchmark"):
	results_list = []

	# Define Preprocessing Pipeline
	# Numeric -> Scale, Categorical -> OneHot
	num_cols = X.select_dtypes(include=np.number).columns
	cat_cols = X.select_dtypes(exclude=np.number).columns

	transformers = []
	if len(num_cols) > 0:
	transformers.append(('num', StandardScaler(), num_cols))
	if len(cat_cols) > 0:
	transformers.append(('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), cat_cols))

	preprocessor = ColumnTransformer(transformers=transformers)

	st.markdown("### 🏆 Results")

	for name in selected_models:
	with st.status(f"Training {name}...", expanded=True) as status:
	# Build Pipeline
	clf = available_models[name]
	model_pipeline = Pipeline([
	('preprocessor', preprocessor),
	('classifier', clf)
	])

	# Train
	model_pipeline.fit(X_train, y_train)
	y_pred = model_pipeline.predict(X_test)

	# Metrics
	acc = accuracy_score(y_test, y_pred)
	f1 = f1_score(y_test, y_pred, average="weighted")

	results_list.append({
	"Model": name,
	"Accuracy": acc,
	"F1 Score (Weighted)": f1
	})

	status.write(f"Accuracy: {acc:.4f}")
	status.update(label=f"{name} Finished", state="complete")

	# Detailed Analysis (Expander)
	with st.expander(f"🔍 Details: {name}"):
	c1, c2 = st.columns(2)

	# Confusion Matrix
	cm = confusion_matrix(y_test, y_pred)
	fig_cm = px.imshow(cm, text_auto=True,
	x=class_names, y=class_names,
	labels=dict(x="Predicted", y="Actual"),
	title=f"Confusion Matrix ({name})",
	color_continuous_scale="Blues")
	c1.plotly_chart(fig_cm, use_container_width=True)

	# Classification Report
	report = classification_report(y_test, y_pred, target_names=class_names, output_dict=True)
	df_report = pd.DataFrame(report).transpose()
	c2.dataframe(df_report.style.background_gradient(cmap="Greens", subset=["f1-score"]))

	# Summary Table
	st.subheader("🏁 Leaderboard")
	res_df = pd.DataFrame(results_list).sort_values(by="F1 Score (Weighted)", ascending=False)
	st.dataframe(res_df.style.highlight_max(axis=0, color="lightgreen"), use_container_width=True)

	# Comparison Chart
	fig_bench = px.bar(res_df, x="Model", y="F1 Score (Weighted)",
	color="Accuracy", title="Model Performance Comparison",
	range_y=[0, 1])
	st.plotly_chart(fig_bench, use_container_width=True)