Harika22 commited on
Commit
28b2ad4
Β·
verified Β·
1 Parent(s): 638b2d1

Update pages/13_Linear_Regression.py

Browse files
Files changed (1) hide show
  1. pages/13_Linear_Regression.py +74 -115
pages/13_Linear_Regression.py CHANGED
@@ -1,141 +1,100 @@
1
  import streamlit as st
 
 
 
 
 
2
 
3
  st.set_page_config(page_title="Linear Regression", page_icon="πŸ“Š", layout="wide")
4
 
5
- st.markdown("<h1 style='text-align: center;'>πŸ“ˆ Linear Regression: A Visual and Theoretical Guide</h1>", unsafe_allow_html=True)
6
-
7
- section = st.sidebar.radio(
8
- "πŸ” Explore Topics",
9
- [
10
- "πŸ“˜ What is Linear Regression?",
11
- "πŸ“ Best Fit Line",
12
- "πŸ”§ Training (Simple Linear Regression)",
13
- "πŸ” Testing Phase",
14
- "πŸ“Š Multiple Linear Regression",
15
- "βš™οΈ Gradient Descent",
16
- "πŸ“ Assumptions",
17
- "πŸ“Š Evaluation Metrics",
18
- "πŸ““ Colab Notebook",
19
- ]
20
- )
21
 
22
- if section == "πŸ“˜ What is Linear Regression?":
23
- st.subheader("πŸ“˜ What is Linear Regression?")
24
- st.write("""
25
- Linear Regression is a **Supervised Learning Algorithm** used to predict **continuous values**.
26
- - It models the relationship between the **dependent variable (target)** and one or more **independent variables (features)**.
27
- - The goal is to fit the **best straight line** that minimizes the error.
28
- """)
29
 
30
- elif section == "πŸ“ Best Fit Line":
31
- st.subheader("πŸ“ What is the Best Fit Line?")
32
- st.write("""
33
- A **best fit line**:
34
- - Minimizes the **Mean Squared Error (MSE)**
35
- - Can be found using **Ordinary Least Squares (OLS)** or **Gradient Descent**
36
-
37
- #### Simple Linear Equation:
38
- $$
39
- \hat{y} = w_1 x + w_0
40
- $$
41
- - \( w_1 \): slope (coefficient)
42
- - \( w_0 \): intercept (bias)
43
  """)
44
 
45
- elif section == "πŸ”§ Training (Simple Linear Regression)":
46
- st.subheader("πŸ”§ Training: Simple Linear Regression")
47
- st.write("""
48
- Used when there’s only **one feature**.
49
-
50
- **Steps to Train:**
51
- 1. Initialize weights: \( w_1, w_0 \)
52
- 2. Predict: \( \hat{y} = w_1 x + w_0 \)
53
- 3. Calculate **Mean Squared Error (MSE)**:
54
- $$
55
- \text{MSE} = \frac{1}{n} \sum (\hat{y}_i - y_i)^2
56
- $$
57
- 4. Optimize weights using **Gradient Descent**
58
  """)
59
 
60
- elif section == "πŸ” Testing Phase":
61
- st.subheader("πŸ” Prediction (Testing Phase)")
62
- st.write("""
63
- Once trained, the model can predict new outcomes:
 
 
64
 
65
- **Given new input \( x \):**
66
- $$
67
- \hat{y} = w_1 x + w_0
68
- $$
 
 
 
69
 
70
- - Compare predicted \( \hat{y} \) with actual \( y \) (if known)
 
 
 
 
 
 
 
71
  """)
72
 
73
- elif section == "πŸ“Š Multiple Linear Regression":
74
- st.subheader("πŸ“Š Multiple Linear Regression")
75
- st.write("""
76
- Predicts using **multiple features**.
 
77
 
78
- #### Equation:
79
- $$
80
- \hat{y} = w_1 x_1 + w_2 x_2 + \dots + w_n x_n + w_0
81
- $$
 
82
 
83
- - Each input feature has its own weight
84
- - Use same process: predict β†’ calculate loss β†’ optimize
85
- """)
86
 
87
- elif section == "βš™οΈ Gradient Descent":
88
- st.subheader("βš™οΈ Gradient Descent Optimization")
89
- st.write("""
90
- **Goal:** Minimize the loss function (like MSE)
91
-
92
- #### Update Rule:
93
- $$
94
- w := w - \alpha \cdot \frac{\partial \text{MSE}}{\partial w}
95
- $$
96
-
97
- - \( \alpha \): learning rate
98
- - Choose carefully:
99
- - Too high β†’ overshoot
100
- - Too low β†’ slow convergence
101
- - Common choices: 0.01, 0.1
102
- """)
103
 
104
- elif section == "πŸ“ Assumptions":
105
- st.subheader("πŸ“ Assumptions of Linear Regression")
106
- st.write("""
107
- 1. **Linearity**: Relationship between variables is linear
108
- 2. **No Multicollinearity**: Features shouldn't be highly correlated
109
- 3. **Homoscedasticity**: Constant variance of residuals
110
- 4. **Normality of Errors**: Errors are normally distributed
111
- 5. **No Autocorrelation**: Errors should not be related across observations
112
- """)
113
 
114
- elif section == "πŸ“Š Evaluation Metrics":
115
- st.subheader("πŸ“Š Evaluation Metrics for Linear Regression")
116
- st.write("""
117
- - **Mean Squared Error (MSE)**:
118
- $$
119
- \text{MSE} = \frac{1}{n} \sum (\hat{y}_i - y_i)^2
120
- $$
121
- - **Mean Absolute Error (MAE)**:
122
- $$
123
- \text{MAE} = \frac{1}{n} \sum |\hat{y}_i - y_i|
124
- $$
125
- - **R-squared ( \( R^2 \) )**:
126
- $$
127
- R^2 = 1 - \frac{SS_{res}}{SS_{tot}}
128
- $$
129
- Measures how well the model explains the variance in data.
130
- """)
131
 
132
- elif section == "πŸ““ Colab Notebook":
133
- st.subheader("πŸ““ Hands-On Implementation in Google Colab")
134
  st.markdown("""
135
  <a href='https://colab.research.google.com/drive/11-Rv7BC2PhOqk5hnpdXo6QjqLLYLDvTD?usp=sharing' target='_blank'>
136
- πŸ”— Click here to open the Linear Regression Notebook in Colab
137
  </a>
138
  """, unsafe_allow_html=True)
139
 
140
  st.markdown("---")
141
- st.success("Mastering Linear Regression is essential β€” it's the foundation for many advanced models in machine learning!")
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import plotly.express as px
5
+ from sklearn.linear_model import LinearRegression
6
+ from sklearn.metrics import mean_squared_error, r2_score
7
 
8
  st.set_page_config(page_title="Linear Regression", page_icon="πŸ“Š", layout="wide")
9
 
10
+ st.title("πŸ“ˆ Linear Regression Explorer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ section = st.radio(
13
+ "Navigate the Theory and Visuals",
14
+ ["Introduction", "Best Fit Line", "Simple vs Multiple", "Gradient Descent", "Assumptions", "Evaluation Metrics", "Interactive Example", "Colab Notebook"],
15
+ horizontal=True
16
+ )
 
 
17
 
18
+ if section == "Introduction":
19
+ st.header("πŸ“˜ What is Linear Regression?")
20
+ st.markdown("""
21
+ Linear Regression is a **Supervised Learning** algorithm used for predicting **continuous outcomes**.
22
+ The idea is to fit a line that best captures the relationship between input variables and the output variable.
 
 
 
 
 
 
 
 
23
  """)
24
 
25
+ elif section == "Best Fit Line":
26
+ st.header("πŸ“ Best Fit Line")
27
+ st.latex(r"\hat{y} = w_1 x + w_0")
28
+ st.markdown("""
29
+ - \( w_1 \): Slope (how much \( y \) changes with \( x \))
30
+ - \( w_0 \): Intercept
31
+ - Found using **Ordinary Least Squares** or **Gradient Descent**
 
 
 
 
 
 
32
  """)
33
 
34
+ elif section == "Simple vs Multiple":
35
+ st.header("πŸ”§ Simple vs Multiple Linear Regression")
36
+ st.subheader("Simple Linear Regression")
37
+ st.latex(r"\hat{y} = w_1 x + w_0")
38
+ st.subheader("Multiple Linear Regression")
39
+ st.latex(r"\hat{y} = w_1 x_1 + w_2 x_2 + \dots + w_n x_n + w_0")
40
 
41
+ elif section == "Gradient Descent":
42
+ st.header("βš™οΈ Gradient Descent")
43
+ st.latex(r"w := w - \alpha \cdot \frac{\partial \text{Loss}}{\partial w}")
44
+ st.markdown("""
45
+ - \( \alpha \): Learning Rate
46
+ - Goal: Minimize **Mean Squared Error**
47
+ """)
48
 
49
+ elif section == "Assumptions":
50
+ st.header("πŸ“ Assumptions of Linear Regression")
51
+ st.markdown("""
52
+ 1. Linearity
53
+ 2. No Multicollinearity
54
+ 3. Homoscedasticity
55
+ 4. Normality of residuals
56
+ 5. No autocorrelation
57
  """)
58
 
59
+ elif section == "Evaluation Metrics":
60
+ st.header("πŸ“Š Evaluation Metrics")
61
+ st.latex(r"MSE = \frac{1}{n} \sum (\hat{y}_i - y_i)^2")
62
+ st.latex(r"MAE = \frac{1}{n} \sum |\hat{y}_i - y_i|")
63
+ st.latex(r"R^2 = 1 - \frac{\text{SS}_{res}}{\text{SS}_{tot}}")
64
 
65
+ elif section == "Interactive Example":
66
+ st.header("🎯 Try Linear Regression on Real Data")
67
+
68
+ df = px.data.tips() # Load sample dataset
69
+ st.write("Dataset preview:", df.head())
70
 
71
+ x_feature = st.selectbox("Select Independent Variable (X)", df.select_dtypes(include=np.number).columns)
72
+ y_feature = st.selectbox("Select Dependent Variable (Y)", df.select_dtypes(include=np.number).columns, index=1)
 
73
 
74
+ X = df[[x_feature]]
75
+ y = df[y_feature]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ model = LinearRegression()
78
+ model.fit(X, y)
79
+ y_pred = model.predict(X)
 
 
 
 
 
 
80
 
81
+ fig = px.scatter(df, x=x_feature, y=y_feature, title="Scatter Plot with Regression Line")
82
+ fig.add_scatter(x=df[x_feature], y=y_pred, mode='lines', name='Best Fit Line')
83
+ st.plotly_chart(fig, use_container_width=True)
84
+
85
+ st.subheader("Model Performance")
86
+ st.write(f"**Slope (w₁)**: {model.coef_[0]:.4f}")
87
+ st.write(f"**Intercept (wβ‚€)**: {model.intercept_:.4f}")
88
+ st.write(f"**RΒ² Score**: {r2_score(y, y_pred):.4f}")
89
+ st.write(f"**MSE**: {mean_squared_error(y, y_pred):.4f}")
 
 
 
 
 
 
 
 
90
 
91
+ elif section == "Colab Notebook":
92
+ st.header("πŸ““ Open in Google Colab")
93
  st.markdown("""
94
  <a href='https://colab.research.google.com/drive/11-Rv7BC2PhOqk5hnpdXo6QjqLLYLDvTD?usp=sharing' target='_blank'>
95
+ πŸ”— Open Linear Regression Colab Notebook
96
  </a>
97
  """, unsafe_allow_html=True)
98
 
99
  st.markdown("---")
100
+ st.success("This app blends theory with visuals and interaction to help you master Linear Regression!")