Spaces:

Harika22
/

Machine_learning

Sleeping

App Files Files Community

Machine_learning / pages /13_Linear_Regression.py

Harika22

Update pages/13_Linear_Regression.py

28b2ad4 verified 7 months ago

raw

history blame contribute delete

3.75 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import plotly.express as px
	from sklearn.linear_model import LinearRegression
	from sklearn.metrics import mean_squared_error, r2_score

	st.set_page_config(page_title="Linear Regression", page_icon="📊", layout="wide")

	st.title("📈 Linear Regression Explorer")

	section = st.radio(
	"Navigate the Theory and Visuals",
	["Introduction", "Best Fit Line", "Simple vs Multiple", "Gradient Descent", "Assumptions", "Evaluation Metrics", "Interactive Example", "Colab Notebook"],
	horizontal=True
	)

	if section == "Introduction":
	st.header("📘 What is Linear Regression?")
	st.markdown("""
	Linear Regression is a Supervised Learning algorithm used for predicting continuous outcomes.
	The idea is to fit a line that best captures the relationship between input variables and the output variable.
	""")

	elif section == "Best Fit Line":
	st.header("📐 Best Fit Line")
	st.latex(r"\hat{y} = w_1 x + w_0")
	st.markdown("""
	- \( w_1 \): Slope (how much \( y \) changes with \( x \))
	- \( w_0 \): Intercept
	- Found using Ordinary Least Squares or Gradient Descent
	""")

	elif section == "Simple vs Multiple":
	st.header("🔧 Simple vs Multiple Linear Regression")
	st.subheader("Simple Linear Regression")
	st.latex(r"\hat{y} = w_1 x + w_0")
	st.subheader("Multiple Linear Regression")
	st.latex(r"\hat{y} = w_1 x_1 + w_2 x_2 + \dots + w_n x_n + w_0")

	elif section == "Gradient Descent":
	st.header("⚙️ Gradient Descent")
	st.latex(r"w := w - \alpha \cdot \frac{\partial \text{Loss}}{\partial w}")
	st.markdown("""
	- \( \alpha \): Learning Rate
	- Goal: Minimize Mean Squared Error
	""")

	elif section == "Assumptions":
	st.header("📏 Assumptions of Linear Regression")
	st.markdown("""
	1. Linearity
	2. No Multicollinearity
	3. Homoscedasticity
	4. Normality of residuals
	5. No autocorrelation
	""")

	elif section == "Evaluation Metrics":
	st.header("📊 Evaluation Metrics")
	st.latex(r"MSE = \frac{1}{n} \sum (\hat{y}_i - y_i)^2")
	st.latex(r"MAE = \frac{1}{n} \sum \|\hat{y}_i - y_i\|")
	st.latex(r"R^2 = 1 - \frac{\text{SS}_{res}}{\text{SS}_{tot}}")

	elif section == "Interactive Example":
	st.header("🎯 Try Linear Regression on Real Data")

	df = px.data.tips() # Load sample dataset
	st.write("Dataset preview:", df.head())

	x_feature = st.selectbox("Select Independent Variable (X)", df.select_dtypes(include=np.number).columns)
	y_feature = st.selectbox("Select Dependent Variable (Y)", df.select_dtypes(include=np.number).columns, index=1)

	X = df[[x_feature]]
	y = df[y_feature]

	model = LinearRegression()
	model.fit(X, y)
	y_pred = model.predict(X)

	fig = px.scatter(df, x=x_feature, y=y_feature, title="Scatter Plot with Regression Line")
	fig.add_scatter(x=df[x_feature], y=y_pred, mode='lines', name='Best Fit Line')
	st.plotly_chart(fig, use_container_width=True)

	st.subheader("Model Performance")
	st.write(f"Slope (w₁): {model.coef_[0]:.4f}")
	st.write(f"Intercept (w₀): {model.intercept_:.4f}")
	st.write(f"R² Score: {r2_score(y, y_pred):.4f}")
	st.write(f"MSE: {mean_squared_error(y, y_pred):.4f}")

	elif section == "Colab Notebook":
	st.header("📓 Open in Google Colab")
	st.markdown("""
	<a href='https://colab.research.google.com/drive/11-Rv7BC2PhOqk5hnpdXo6QjqLLYLDvTD?usp=sharing' target='_blank'>
	🔗 Open Linear Regression Colab Notebook
	</a>
	""", unsafe_allow_html=True)

	st.markdown("---")
	st.success("This app blends theory with visuals and interaction to help you master Linear Regression!")