import streamlit as st import pandas as pd import numpy as np from sklearn.datasets import load_wine from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.preprocessing import StandardScaler from sklearn.metrics import classification_report, accuracy_score from sklearn.exceptions import ConvergenceWarning import matplotlib.pyplot as plt import seaborn as sns import warnings # Page Config st.set_page_config(page_title="Explore Logistic Regression", layout="wide") st.title("Logistic Regression Classifier") # Load and cache data @st.cache_data def load_data(): wine = load_wine() df = pd.DataFrame(wine.data, columns=wine.feature_names) df["target"] = wine.target return df, wine df, wine = load_data() # Show data preview st.markdown("### 📋 Data Preview") st.dataframe(df.head(), use_container_width=True) # Sidebar settings st.sidebar.header("⚙️ Model Settings") penalty = st.sidebar.radio("Penalty Type", ["l1", "l2", "elasticnet"]) C = st.sidebar.slider("Inverse Regularization Strength (C)", 0.01, 10.0, 1.0) l1_ratio = None if penalty == "elasticnet": solver = "saga" l1_ratio = st.sidebar.slider("ElasticNet Mixing Ratio (l1_ratio)", 0.0, 1.0, 0.5) elif penalty == "l1": solver = "liblinear" else: solver = "lbfgs" # Prepare data X = df.drop("target", axis=1) y = df["target"] scaler = StandardScaler() X_scaled = scaler.fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) # Cache model training @st.cache_resource def train_model(X_train, y_train, model_params): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=ConvergenceWarning) model = LogisticRegression(**model_params) model.fit(X_train, y_train) return model # Train model on button click if st.sidebar.button("Train Model"): with st.spinner("Training the model..."): model_params = { "penalty": penalty, "C": C, "solver": solver, "max_iter": 200, "multi_class": "ovr" } if penalty == "elasticnet": model_params["l1_ratio"] = l1_ratio model = train_model(X_train, y_train, model_params) y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) st.success(f"✅ Model Accuracy: {accuracy * 100:.2f}%") st.markdown("### 📊 Classification Report") st.text(classification_report(y_test, y_pred, target_names=wine.target_names)) # Visualization st.markdown("## 🎨 Visualizing Decision Boundary (2 Features Only)") feature_x = st.selectbox("X-axis Feature", df.columns[:-1], index=0) feature_y = st.selectbox("Y-axis Feature", df.columns[:-1], index=1) X_vis = df[[feature_x, feature_y]] X_vis_scaled = scaler.fit_transform(X_vis) X_train_v, X_test_v, y_train_v, y_test_v = train_test_split(X_vis_scaled, y, test_size=0.2, random_state=42) # Train visualization model (simplified) model_vis = LogisticRegression(penalty="l2", C=1.0, solver="lbfgs", max_iter=200, multi_class="ovr") model_vis.fit(X_train_v, y_train_v) h = 0.2 # larger step for speed x_min, x_max = X_vis_scaled[:, 0].min() - 1, X_vis_scaled[:, 0].max() + 1 y_min, y_max = X_vis_scaled[:, 1].min() - 1, X_vis_scaled[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = model_vis.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape) fig, ax = plt.subplots(figsize=(8, 6)) plt.contourf(xx, yy, Z, alpha=0.3) sns.scatterplot(x=X_vis_scaled[:, 0], y=X_vis_scaled[:, 1], hue=df["target"], palette="Set1", ax=ax) plt.xlabel(feature_x) plt.ylabel(feature_y) plt.title("Decision Boundary") st.pyplot(fig) # Summary st.markdown(""" --- ## ✅ Summary - Logistic Regression is efficient and interpretable. - `l2` is default; `l1` helps with sparsity. - `elasticnet` is a hybrid of both. - Use the sidebar to explore hyperparameters interactively! """)