Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import plotly.express as px | |
| from sklearn.linear_model import LinearRegression | |
| from sklearn.metrics import mean_squared_error, r2_score | |
| st.set_page_config(page_title="Linear Regression", page_icon="π", layout="wide") | |
| st.title("π Linear Regression Explorer") | |
| section = st.radio( | |
| "Navigate the Theory and Visuals", | |
| ["Introduction", "Best Fit Line", "Simple vs Multiple", "Gradient Descent", "Assumptions", "Evaluation Metrics", "Interactive Example", "Colab Notebook"], | |
| horizontal=True | |
| ) | |
| if section == "Introduction": | |
| st.header("π What is Linear Regression?") | |
| st.markdown(""" | |
| Linear Regression is a **Supervised Learning** algorithm used for predicting **continuous outcomes**. | |
| The idea is to fit a line that best captures the relationship between input variables and the output variable. | |
| """) | |
| elif section == "Best Fit Line": | |
| st.header("π Best Fit Line") | |
| st.latex(r"\hat{y} = w_1 x + w_0") | |
| st.markdown(""" | |
| - \( w_1 \): Slope (how much \( y \) changes with \( x \)) | |
| - \( w_0 \): Intercept | |
| - Found using **Ordinary Least Squares** or **Gradient Descent** | |
| """) | |
| elif section == "Simple vs Multiple": | |
| st.header("π§ Simple vs Multiple Linear Regression") | |
| st.subheader("Simple Linear Regression") | |
| st.latex(r"\hat{y} = w_1 x + w_0") | |
| st.subheader("Multiple Linear Regression") | |
| st.latex(r"\hat{y} = w_1 x_1 + w_2 x_2 + \dots + w_n x_n + w_0") | |
| elif section == "Gradient Descent": | |
| st.header("βοΈ Gradient Descent") | |
| st.latex(r"w := w - \alpha \cdot \frac{\partial \text{Loss}}{\partial w}") | |
| st.markdown(""" | |
| - \( \alpha \): Learning Rate | |
| - Goal: Minimize **Mean Squared Error** | |
| """) | |
| elif section == "Assumptions": | |
| st.header("π Assumptions of Linear Regression") | |
| st.markdown(""" | |
| 1. Linearity | |
| 2. No Multicollinearity | |
| 3. Homoscedasticity | |
| 4. Normality of residuals | |
| 5. No autocorrelation | |
| """) | |
| elif section == "Evaluation Metrics": | |
| st.header("π Evaluation Metrics") | |
| st.latex(r"MSE = \frac{1}{n} \sum (\hat{y}_i - y_i)^2") | |
| st.latex(r"MAE = \frac{1}{n} \sum |\hat{y}_i - y_i|") | |
| st.latex(r"R^2 = 1 - \frac{\text{SS}_{res}}{\text{SS}_{tot}}") | |
| elif section == "Interactive Example": | |
| st.header("π― Try Linear Regression on Real Data") | |
| df = px.data.tips() # Load sample dataset | |
| st.write("Dataset preview:", df.head()) | |
| x_feature = st.selectbox("Select Independent Variable (X)", df.select_dtypes(include=np.number).columns) | |
| y_feature = st.selectbox("Select Dependent Variable (Y)", df.select_dtypes(include=np.number).columns, index=1) | |
| X = df[[x_feature]] | |
| y = df[y_feature] | |
| model = LinearRegression() | |
| model.fit(X, y) | |
| y_pred = model.predict(X) | |
| fig = px.scatter(df, x=x_feature, y=y_feature, title="Scatter Plot with Regression Line") | |
| fig.add_scatter(x=df[x_feature], y=y_pred, mode='lines', name='Best Fit Line') | |
| st.plotly_chart(fig, use_container_width=True) | |
| st.subheader("Model Performance") | |
| st.write(f"**Slope (wβ)**: {model.coef_[0]:.4f}") | |
| st.write(f"**Intercept (wβ)**: {model.intercept_:.4f}") | |
| st.write(f"**RΒ² Score**: {r2_score(y, y_pred):.4f}") | |
| st.write(f"**MSE**: {mean_squared_error(y, y_pred):.4f}") | |
| elif section == "Colab Notebook": | |
| st.header("π Open in Google Colab") | |
| st.markdown(""" | |
| <a href='https://colab.research.google.com/drive/11-Rv7BC2PhOqk5hnpdXo6QjqLLYLDvTD?usp=sharing' target='_blank'> | |
| π Open Linear Regression Colab Notebook | |
| </a> | |
| """, unsafe_allow_html=True) | |
| st.markdown("---") | |
| st.success("This app blends theory with visuals and interaction to help you master Linear Regression!") | |