Spaces:
Sleeping
Sleeping
| # benchmarking.py | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import plotly.express as px | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder | |
| from sklearn.compose import ColumnTransformer | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.svm import SVC | |
| def run_benchmarking(): | |
| st.header("βοΈ Professional Model Benchmarking") | |
| df = st.session_state.processed_df | |
| target_col = st.session_state.target_col | |
| feature_cols = st.session_state.feature_cols | |
| # Validation | |
| if not target_col or target_col == "None": | |
| st.error("β οΈ Please select a Target variable in the EDA tab.") | |
| return | |
| if not feature_cols: | |
| st.error("β οΈ Please select Feature variables in the EDA tab.") | |
| return | |
| # Data Preparation | |
| X = df[feature_cols] | |
| y = df[target_col] | |
| # Detect task type (Binary vs Multiclass) | |
| y_nunique = y.nunique() | |
| is_binary = y_nunique == 2 | |
| # Encode Target | |
| le = LabelEncoder() | |
| y_encoded = le.fit_transform(y) | |
| class_names = [str(c) for c in le.classes_] | |
| # Train/Test Split | |
| test_size = st.slider("Test Split Size", 0.1, 0.5, 0.2) | |
| X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=test_size, random_state=42) | |
| # ---------------- Model Configuration ---------------- | |
| available_models = { | |
| "Logistic Regression": LogisticRegression(max_iter=1000), | |
| "Random Forest": RandomForestClassifier(n_estimators=100), | |
| "SVM": SVC(probability=True) | |
| } | |
| selected_models = st.multiselect("Select Models to Benchmark", list(available_models.keys()), default=["Random Forest"]) | |
| if st.button("π Run Benchmark"): | |
| results_list = [] | |
| # Define Preprocessing Pipeline | |
| # Numeric -> Scale, Categorical -> OneHot | |
| num_cols = X.select_dtypes(include=np.number).columns | |
| cat_cols = X.select_dtypes(exclude=np.number).columns | |
| transformers = [] | |
| if len(num_cols) > 0: | |
| transformers.append(('num', StandardScaler(), num_cols)) | |
| if len(cat_cols) > 0: | |
| transformers.append(('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), cat_cols)) | |
| preprocessor = ColumnTransformer(transformers=transformers) | |
| st.markdown("### π Results") | |
| for name in selected_models: | |
| with st.status(f"Training {name}...", expanded=True) as status: | |
| # Build Pipeline | |
| clf = available_models[name] | |
| model_pipeline = Pipeline([ | |
| ('preprocessor', preprocessor), | |
| ('classifier', clf) | |
| ]) | |
| # Train | |
| model_pipeline.fit(X_train, y_train) | |
| y_pred = model_pipeline.predict(X_test) | |
| # Metrics | |
| acc = accuracy_score(y_test, y_pred) | |
| f1 = f1_score(y_test, y_pred, average="weighted") | |
| results_list.append({ | |
| "Model": name, | |
| "Accuracy": acc, | |
| "F1 Score (Weighted)": f1 | |
| }) | |
| status.write(f"Accuracy: {acc:.4f}") | |
| status.update(label=f"{name} Finished", state="complete") | |
| # Detailed Analysis (Expander) | |
| with st.expander(f"π Details: {name}"): | |
| c1, c2 = st.columns(2) | |
| # Confusion Matrix | |
| cm = confusion_matrix(y_test, y_pred) | |
| fig_cm = px.imshow(cm, text_auto=True, | |
| x=class_names, y=class_names, | |
| labels=dict(x="Predicted", y="Actual"), | |
| title=f"Confusion Matrix ({name})", | |
| color_continuous_scale="Blues") | |
| c1.plotly_chart(fig_cm, use_container_width=True) | |
| # Classification Report | |
| report = classification_report(y_test, y_pred, target_names=class_names, output_dict=True) | |
| df_report = pd.DataFrame(report).transpose() | |
| c2.dataframe(df_report.style.background_gradient(cmap="Greens", subset=["f1-score"])) | |
| # Summary Table | |
| st.subheader("π Leaderboard") | |
| res_df = pd.DataFrame(results_list).sort_values(by="F1 Score (Weighted)", ascending=False) | |
| st.dataframe(res_df.style.highlight_max(axis=0, color="lightgreen"), use_container_width=True) | |
| # Comparison Chart | |
| fig_bench = px.bar(res_df, x="Model", y="F1 Score (Weighted)", | |
| color="Accuracy", title="Model Performance Comparison", | |
| range_y=[0, 1]) | |
| st.plotly_chart(fig_bench, use_container_width=True) |