Machine_learning / pages /13_Linear_Regression.py
Harika22's picture
Update pages/13_Linear_Regression.py
28b2ad4 verified
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
st.set_page_config(page_title="Linear Regression", page_icon="πŸ“Š", layout="wide")
st.title("πŸ“ˆ Linear Regression Explorer")
section = st.radio(
"Navigate the Theory and Visuals",
["Introduction", "Best Fit Line", "Simple vs Multiple", "Gradient Descent", "Assumptions", "Evaluation Metrics", "Interactive Example", "Colab Notebook"],
horizontal=True
)
if section == "Introduction":
st.header("πŸ“˜ What is Linear Regression?")
st.markdown("""
Linear Regression is a **Supervised Learning** algorithm used for predicting **continuous outcomes**.
The idea is to fit a line that best captures the relationship between input variables and the output variable.
""")
elif section == "Best Fit Line":
st.header("πŸ“ Best Fit Line")
st.latex(r"\hat{y} = w_1 x + w_0")
st.markdown("""
- \( w_1 \): Slope (how much \( y \) changes with \( x \))
- \( w_0 \): Intercept
- Found using **Ordinary Least Squares** or **Gradient Descent**
""")
elif section == "Simple vs Multiple":
st.header("πŸ”§ Simple vs Multiple Linear Regression")
st.subheader("Simple Linear Regression")
st.latex(r"\hat{y} = w_1 x + w_0")
st.subheader("Multiple Linear Regression")
st.latex(r"\hat{y} = w_1 x_1 + w_2 x_2 + \dots + w_n x_n + w_0")
elif section == "Gradient Descent":
st.header("βš™οΈ Gradient Descent")
st.latex(r"w := w - \alpha \cdot \frac{\partial \text{Loss}}{\partial w}")
st.markdown("""
- \( \alpha \): Learning Rate
- Goal: Minimize **Mean Squared Error**
""")
elif section == "Assumptions":
st.header("πŸ“ Assumptions of Linear Regression")
st.markdown("""
1. Linearity
2. No Multicollinearity
3. Homoscedasticity
4. Normality of residuals
5. No autocorrelation
""")
elif section == "Evaluation Metrics":
st.header("πŸ“Š Evaluation Metrics")
st.latex(r"MSE = \frac{1}{n} \sum (\hat{y}_i - y_i)^2")
st.latex(r"MAE = \frac{1}{n} \sum |\hat{y}_i - y_i|")
st.latex(r"R^2 = 1 - \frac{\text{SS}_{res}}{\text{SS}_{tot}}")
elif section == "Interactive Example":
st.header("🎯 Try Linear Regression on Real Data")
df = px.data.tips() # Load sample dataset
st.write("Dataset preview:", df.head())
x_feature = st.selectbox("Select Independent Variable (X)", df.select_dtypes(include=np.number).columns)
y_feature = st.selectbox("Select Dependent Variable (Y)", df.select_dtypes(include=np.number).columns, index=1)
X = df[[x_feature]]
y = df[y_feature]
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
fig = px.scatter(df, x=x_feature, y=y_feature, title="Scatter Plot with Regression Line")
fig.add_scatter(x=df[x_feature], y=y_pred, mode='lines', name='Best Fit Line')
st.plotly_chart(fig, use_container_width=True)
st.subheader("Model Performance")
st.write(f"**Slope (w₁)**: {model.coef_[0]:.4f}")
st.write(f"**Intercept (wβ‚€)**: {model.intercept_:.4f}")
st.write(f"**RΒ² Score**: {r2_score(y, y_pred):.4f}")
st.write(f"**MSE**: {mean_squared_error(y, y_pred):.4f}")
elif section == "Colab Notebook":
st.header("πŸ““ Open in Google Colab")
st.markdown("""
<a href='https://colab.research.google.com/drive/11-Rv7BC2PhOqk5hnpdXo6QjqLLYLDvTD?usp=sharing' target='_blank'>
πŸ”— Open Linear Regression Colab Notebook
</a>
""", unsafe_allow_html=True)
st.markdown("---")
st.success("This app blends theory with visuals and interaction to help you master Linear Regression!")