File size: 4,206 Bytes
bb5ca83
 
 
ec70e52
798b769
ec70e52
bb5ca83
ec70e52
8777031
798b769
ec70e52
8777031
bb5ca83
8777031
ec70e52
e36c883
bb5ca83
8777031
f3c736b
 
 
 
 
 
 
 
 
8777031
ec70e52
bb5ca83
 
8777031
ec70e52
f3c736b
 
ec70e52
 
f3c736b
 
ec70e52
f3c736b
 
ec70e52
f3c736b
ec70e52
f3c736b
ec70e52
 
bb5ca83
 
ec70e52
 
8777031
 
ab3a980
8777031
 
ab3a980
8777031
 
 
ab3a980
8777031
ab3a980
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb5ca83
 
ec70e52
8777031
 
 
 
5557523
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.exceptions import ConvergenceWarning
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

# Page Config
st.set_page_config(page_title="Explore Logistic Regression", layout="wide")
st.title("Logistic Regression Classifier")

# Load and cache data
@st.cache_data
def load_data():
    wine = load_wine()
    df = pd.DataFrame(wine.data, columns=wine.feature_names)
    df["target"] = wine.target
    return df, wine

df, wine = load_data()

# Show data preview
st.markdown("### πŸ“‹ Data Preview")
st.dataframe(df.head(), use_container_width=True)

# Sidebar settings
st.sidebar.header("βš™οΈ Model Settings")
penalty = st.sidebar.radio("Penalty Type", ["l1", "l2", "elasticnet"])
C = st.sidebar.slider("Inverse Regularization Strength (C)", 0.01, 10.0, 1.0)

l1_ratio = None
if penalty == "elasticnet":
    solver = "saga"
    l1_ratio = st.sidebar.slider("ElasticNet Mixing Ratio (l1_ratio)", 0.0, 1.0, 0.5)
elif penalty == "l1":
    solver = "liblinear"
else:
    solver = "lbfgs"

# Prepare data
X = df.drop("target", axis=1)
y = df["target"]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Cache model training
@st.cache_resource
def train_model(X_train, y_train, model_params):
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=ConvergenceWarning)
        model = LogisticRegression(**model_params)
        model.fit(X_train, y_train)
    return model

# Train model on button click
if st.sidebar.button("Train Model"):
    with st.spinner("Training the model..."):
        model_params = {
            "penalty": penalty,
            "C": C,
            "solver": solver,
            "max_iter": 200,
            "multi_class": "ovr"
        }
        if penalty == "elasticnet":
            model_params["l1_ratio"] = l1_ratio

        model = train_model(X_train, y_train, model_params)
        y_pred = model.predict(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        st.success(f"βœ… Model Accuracy: {accuracy * 100:.2f}%")

        st.markdown("### πŸ“Š Classification Report")
        st.text(classification_report(y_test, y_pred, target_names=wine.target_names))

        # Visualization
        st.markdown("## 🎨 Visualizing Decision Boundary (2 Features Only)")
        feature_x = st.selectbox("X-axis Feature", df.columns[:-1], index=0)
        feature_y = st.selectbox("Y-axis Feature", df.columns[:-1], index=1)

        X_vis = df[[feature_x, feature_y]]
        X_vis_scaled = scaler.fit_transform(X_vis)
        X_train_v, X_test_v, y_train_v, y_test_v = train_test_split(X_vis_scaled, y, test_size=0.2, random_state=42)

        # Train visualization model (simplified)
        model_vis = LogisticRegression(penalty="l2", C=1.0, solver="lbfgs", max_iter=200, multi_class="ovr")
        model_vis.fit(X_train_v, y_train_v)

        h = 0.2  # larger step for speed
        x_min, x_max = X_vis_scaled[:, 0].min() - 1, X_vis_scaled[:, 0].max() + 1
        y_min, y_max = X_vis_scaled[:, 1].min() - 1, X_vis_scaled[:, 1].max() + 1
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
        Z = model_vis.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

        fig, ax = plt.subplots(figsize=(8, 6))
        plt.contourf(xx, yy, Z, alpha=0.3)
        sns.scatterplot(x=X_vis_scaled[:, 0], y=X_vis_scaled[:, 1], hue=df["target"], palette="Set1", ax=ax)
        plt.xlabel(feature_x)
        plt.ylabel(feature_y)
        plt.title("Decision Boundary")
        st.pyplot(fig)

# Summary
st.markdown("""
---
## βœ… Summary
- Logistic Regression is efficient and interpretable.
- `l2` is default; `l1` helps with sparsity.
- `elasticnet` is a hybrid of both.
- Use the sidebar to explore hyperparameters interactively!
""")