File size: 5,165 Bytes
ea419b1
 
 
bca40b9
 
ea419b1
bca40b9
 
 
 
ea419b1
 
 
 
 
bca40b9
ea419b1
 
bca40b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# benchmarking.py
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

def run_benchmarking():
    st.header("βš–οΈ Professional Model Benchmarking")

    df = st.session_state.processed_df
    target_col = st.session_state.target_col
    feature_cols = st.session_state.feature_cols

    # Validation
    if not target_col or target_col == "None":
        st.error("⚠️ Please select a Target variable in the EDA tab.")
        return
    if not feature_cols:
        st.error("⚠️ Please select Feature variables in the EDA tab.")
        return

    # Data Preparation
    X = df[feature_cols]
    y = df[target_col]

    # Detect task type (Binary vs Multiclass)
    y_nunique = y.nunique()
    is_binary = y_nunique == 2

    # Encode Target
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    class_names = [str(c) for c in le.classes_]

    # Train/Test Split
    test_size = st.slider("Test Split Size", 0.1, 0.5, 0.2)
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=test_size, random_state=42)

    # ---------------- Model Configuration ----------------
    available_models = {
        "Logistic Regression": LogisticRegression(max_iter=1000),
        "Random Forest": RandomForestClassifier(n_estimators=100),
        "SVM": SVC(probability=True)
    }

    selected_models = st.multiselect("Select Models to Benchmark", list(available_models.keys()), default=["Random Forest"])

    if st.button("πŸš€ Run Benchmark"):
        results_list = []
        
        # Define Preprocessing Pipeline
        # Numeric -> Scale, Categorical -> OneHot
        num_cols = X.select_dtypes(include=np.number).columns
        cat_cols = X.select_dtypes(exclude=np.number).columns

        transformers = []
        if len(num_cols) > 0:
            transformers.append(('num', StandardScaler(), num_cols))
        if len(cat_cols) > 0:
            transformers.append(('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), cat_cols))
        
        preprocessor = ColumnTransformer(transformers=transformers)

        st.markdown("### πŸ† Results")
        
        for name in selected_models:
            with st.status(f"Training {name}...", expanded=True) as status:
                # Build Pipeline
                clf = available_models[name]
                model_pipeline = Pipeline([
                    ('preprocessor', preprocessor),
                    ('classifier', clf)
                ])

                # Train
                model_pipeline.fit(X_train, y_train)
                y_pred = model_pipeline.predict(X_test)
                
                # Metrics
                acc = accuracy_score(y_test, y_pred)
                f1 = f1_score(y_test, y_pred, average="weighted")
                
                results_list.append({
                    "Model": name,
                    "Accuracy": acc,
                    "F1 Score (Weighted)": f1
                })
                
                status.write(f"Accuracy: {acc:.4f}")
                status.update(label=f"{name} Finished", state="complete")

                # Detailed Analysis (Expander)
                with st.expander(f"πŸ” Details: {name}"):
                    c1, c2 = st.columns(2)
                    
                    # Confusion Matrix
                    cm = confusion_matrix(y_test, y_pred)
                    fig_cm = px.imshow(cm, text_auto=True, 
                                       x=class_names, y=class_names,
                                       labels=dict(x="Predicted", y="Actual"),
                                       title=f"Confusion Matrix ({name})",
                                       color_continuous_scale="Blues")
                    c1.plotly_chart(fig_cm, use_container_width=True)

                    # Classification Report
                    report = classification_report(y_test, y_pred, target_names=class_names, output_dict=True)
                    df_report = pd.DataFrame(report).transpose()
                    c2.dataframe(df_report.style.background_gradient(cmap="Greens", subset=["f1-score"]))

        # Summary Table
        st.subheader("🏁 Leaderboard")
        res_df = pd.DataFrame(results_list).sort_values(by="F1 Score (Weighted)", ascending=False)
        st.dataframe(res_df.style.highlight_max(axis=0, color="lightgreen"), use_container_width=True)
        
        # Comparison Chart
        fig_bench = px.bar(res_df, x="Model", y="F1 Score (Weighted)", 
                           color="Accuracy", title="Model Performance Comparison",
                           range_y=[0, 1])
        st.plotly_chart(fig_bench, use_container_width=True)