UmaKumpatla commited on
Commit
f3c736b
Β·
verified Β·
1 Parent(s): 5ce355a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -66
app.py CHANGED
@@ -9,125 +9,112 @@ from sklearn.metrics import classification_report, accuracy_score
9
  import matplotlib.pyplot as plt
10
  import seaborn as sns
11
 
12
- # Streamlit Page Configuration
13
  st.set_page_config(page_title="Explore Logistic Regression", layout="wide")
14
  st.title("Logistic Regression Classifier")
15
 
 
 
 
 
 
 
 
 
 
 
16
  # Introduction
17
  st.markdown("""
18
  ## 🧠 What is Logistic Regression?
 
19
 
20
- Logistic Regression is a widely used classification technique that models the probability of class membership.
21
- It’s particularly useful when the output is categorical (e.g., types of wines πŸ‡).
22
-
23
  ---
24
-
25
- ## πŸ“¦ Dataset: Wine Classification🍷
26
-
27
- We'll be using the Wine dataset, which contains chemical analysis of wines grown in the same region in Italy, but derived from three different cultivars.
28
  """)
29
 
30
- # Load and preview Wine dataset
31
- wine = load_wine()
32
- df = pd.DataFrame(wine.data, columns=wine.feature_names)
33
- df['target'] = wine.target
34
-
35
  st.markdown("### πŸ“‹ Data Preview")
36
  st.dataframe(df.head(), use_container_width=True)
37
 
38
- # Sidebar: Model Settings
39
  st.sidebar.header("βš™οΈ Model Settings")
40
- penalty = st.sidebar.radio("Penalty Type (Regularization)", ["l1", "l2", "elasticnet"])
41
- C = st.sidebar.slider("Inverse Regularization Strength (C)", 0.01, 10.0, value=1.0)
42
 
43
- # Determine solver and extra params based on penalty
44
  l1_ratio = None
45
-
46
- if penalty == 'elasticnet':
47
- solver = 'saga'
48
  l1_ratio = st.sidebar.slider("ElasticNet Mixing Ratio (l1_ratio)", 0.0, 1.0, 0.5)
49
- elif penalty == 'l1':
50
- solver = 'liblinear' # saga also works for l1
51
  else:
52
- solver = 'lbfgs'
53
 
54
- # Prepare features and target
55
  X = df.drop("target", axis=1)
56
  y = df["target"]
57
 
58
- # Feature scaling
59
  scaler = StandardScaler()
60
  X_scaled = scaler.fit_transform(X)
61
-
62
- # Train-test split
63
  X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
64
 
65
- # Train the Logistic Regression model
66
- model = LogisticRegression(
67
- penalty=penalty,
68
- C=C,
69
- solver=solver,
70
- multi_class='ovr',
71
- max_iter=200,
72
- l1_ratio=l1_ratio # Ignored if not used
73
- )
74
- model.fit(X_train, y_train)
75
- y_pred = model.predict(X_test)
76
-
77
- # Accuracy and classification report
 
78
  accuracy = accuracy_score(y_test, y_pred)
79
  st.success(f"βœ… Model Accuracy: {accuracy * 100:.2f}%")
80
 
81
  st.markdown("### πŸ“Š Classification Report")
82
  st.text(classification_report(y_test, y_pred, target_names=wine.target_names))
83
 
84
- # Visualization Section
85
  st.markdown("## 🎨 Visualizing the Decision Boundary (2 Features Only)")
86
-
87
- feature_x = st.selectbox("Select X-axis Feature", df.columns[:-1], index=0)
88
- feature_y = st.selectbox("Select Y-axis Feature", df.columns[:-1], index=1)
89
 
90
  X_vis = df[[feature_x, feature_y]]
91
  X_vis_scaled = scaler.fit_transform(X_vis)
92
-
93
  X_train_v, X_test_v, y_train_v, y_test_v = train_test_split(X_vis_scaled, y, test_size=0.2, random_state=42)
94
 
 
95
  model_vis = LogisticRegression(
96
- penalty=penalty,
97
- C=C,
98
- solver=solver,
99
- multi_class='ovr',
100
- max_iter=200,
101
- l1_ratio=l1_ratio
102
  )
103
  model_vis.fit(X_train_v, y_train_v)
104
 
105
- # Meshgrid for decision boundary
106
- h = .02
107
  x_min, x_max = X_vis_scaled[:, 0].min() - 1, X_vis_scaled[:, 0].max() + 1
108
  y_min, y_max = X_vis_scaled[:, 1].min() - 1, X_vis_scaled[:, 1].max() + 1
109
  xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
110
- Z = model_vis.predict(np.c_[xx.ravel(), yy.ravel()])
111
- Z = Z.reshape(xx.shape)
112
 
113
  fig, ax = plt.subplots(figsize=(8, 6))
114
  plt.contourf(xx, yy, Z, alpha=0.3)
115
- sns.scatterplot(x=X_vis_scaled[:, 0], y=X_vis_scaled[:, 1], hue=df['target'], palette='Set1', ax=ax)
116
  plt.xlabel(feature_x)
117
  plt.ylabel(feature_y)
118
- plt.title("Decision Boundaries using Logistic Regression")
119
  st.pyplot(fig)
120
 
121
- # Closing Notes
122
  st.markdown("""
123
  ---
124
-
125
  ## βœ… Summary
126
-
127
- - **Logistic Regression** is great for interpretable, fast classification.
128
- - `l2` penalty works well for most tasks.
129
- - `l1` encourages sparsity (feature selection).
130
- - `elasticnet` balances both `l1` and `l2`.
131
-
132
- 🎯 *Tip:* Try different penalties and mixing ratios to understand their effect on model performance and interpretability.
133
  """)
 
9
  import matplotlib.pyplot as plt
10
  import seaborn as sns
11
 
12
+ # Page Configuration
13
  st.set_page_config(page_title="Explore Logistic Regression", layout="wide")
14
  st.title("Logistic Regression Classifier")
15
 
16
+ # Cache data loading
17
+ @st.cache_data
18
+ def load_data():
19
+ wine = load_wine()
20
+ df = pd.DataFrame(wine.data, columns=wine.feature_names)
21
+ df["target"] = wine.target
22
+ return df, wine
23
+
24
+ df, wine = load_data()
25
+
26
  # Introduction
27
  st.markdown("""
28
  ## 🧠 What is Logistic Regression?
29
+ Logistic Regression models the probability of class membership, especially for categorical outputs.
30
 
31
+ ## πŸ“¦ Dataset: Wine Classification 🍷
32
+ We’ll use chemical analysis of wines from 3 cultivars.
 
33
  ---
 
 
 
 
34
  """)
35
 
36
+ # Show dataset
 
 
 
 
37
  st.markdown("### πŸ“‹ Data Preview")
38
  st.dataframe(df.head(), use_container_width=True)
39
 
40
+ # Sidebar Settings
41
  st.sidebar.header("βš™οΈ Model Settings")
42
+ penalty = st.sidebar.radio("Penalty Type", ["l1", "l2", "elasticnet"])
43
+ C = st.sidebar.slider("Inverse Regularization Strength (C)", 0.01, 10.0, 1.0)
44
 
 
45
  l1_ratio = None
46
+ if penalty == "elasticnet":
47
+ solver = "saga"
 
48
  l1_ratio = st.sidebar.slider("ElasticNet Mixing Ratio (l1_ratio)", 0.0, 1.0, 0.5)
49
+ elif penalty == "l1":
50
+ solver = "liblinear"
51
  else:
52
+ solver = "lbfgs"
53
 
54
+ # Prepare data
55
  X = df.drop("target", axis=1)
56
  y = df["target"]
57
 
 
58
  scaler = StandardScaler()
59
  X_scaled = scaler.fit_transform(X)
 
 
60
  X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
61
 
62
+ # Train model
63
+ with st.spinner("Training the model..."):
64
+ model = LogisticRegression(
65
+ penalty=penalty,
66
+ C=C,
67
+ solver=solver,
68
+ max_iter=100,
69
+ multi_class="ovr",
70
+ l1_ratio=l1_ratio
71
+ )
72
+ model.fit(X_train, y_train)
73
+ y_pred = model.predict(X_test)
74
+
75
+ # Show metrics
76
  accuracy = accuracy_score(y_test, y_pred)
77
  st.success(f"βœ… Model Accuracy: {accuracy * 100:.2f}%")
78
 
79
  st.markdown("### πŸ“Š Classification Report")
80
  st.text(classification_report(y_test, y_pred, target_names=wine.target_names))
81
 
82
+ # Visualization
83
  st.markdown("## 🎨 Visualizing the Decision Boundary (2 Features Only)")
84
+ feature_x = st.selectbox("X-axis Feature", df.columns[:-1], index=0)
85
+ feature_y = st.selectbox("Y-axis Feature", df.columns[:-1], index=1)
 
86
 
87
  X_vis = df[[feature_x, feature_y]]
88
  X_vis_scaled = scaler.fit_transform(X_vis)
 
89
  X_train_v, X_test_v, y_train_v, y_test_v = train_test_split(X_vis_scaled, y, test_size=0.2, random_state=42)
90
 
91
+ # Visualization model (simplified)
92
  model_vis = LogisticRegression(
93
+ penalty="l2", C=1.0, solver="lbfgs", max_iter=100, multi_class="ovr"
 
 
 
 
 
94
  )
95
  model_vis.fit(X_train_v, y_train_v)
96
 
97
+ # Plot decision boundary
98
+ h = 0.05
99
  x_min, x_max = X_vis_scaled[:, 0].min() - 1, X_vis_scaled[:, 0].max() + 1
100
  y_min, y_max = X_vis_scaled[:, 1].min() - 1, X_vis_scaled[:, 1].max() + 1
101
  xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
102
+ Z = model_vis.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
 
103
 
104
  fig, ax = plt.subplots(figsize=(8, 6))
105
  plt.contourf(xx, yy, Z, alpha=0.3)
106
+ sns.scatterplot(x=X_vis_scaled[:, 0], y=X_vis_scaled[:, 1], hue=df["target"], palette="Set1", ax=ax)
107
  plt.xlabel(feature_x)
108
  plt.ylabel(feature_y)
109
+ plt.title("Decision Boundary")
110
  st.pyplot(fig)
111
 
112
+ # Summary
113
  st.markdown("""
114
  ---
 
115
  ## βœ… Summary
116
+ - Logistic Regression is great for interpretable, fast classification.
117
+ - `l2` is default; `l1` helps with feature selection.
118
+ - `elasticnet` balances both.
119
+ 🎯 *Tip:* Try adjusting C and penalty type for deeper insights!
 
 
 
120
  """)