Spaces:

UmaKumpatla
/

Linear_Regression

Sleeping

App Files Files Community

Linear_Regression / app.py

UmaKumpatla

Update app.py

988d6d6 verified 10 months ago

raw

history blame contribute delete

3.29 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from sklearn.datasets import fetch_openml
	from sklearn.model_selection import train_test_split
	from sklearn.linear_model import LinearRegression
	from sklearn.preprocessing import StandardScaler
	from sklearn.metrics import mean_squared_error, r2_score
	import matplotlib.pyplot as plt
	import seaborn as sns

	# Page setup
	st.set_page_config(page_title="Explore Linear Regression", layout="wide")
	st.title("Linear Regression")

	# Intro
	st.markdown("""
	## 📘 What is Linear Regression?

	Linear Regression models the relationship between a continuous outcome and one or more input variables (features).

	Equation:
	\[
	\\hat{y} = w_1x_1 + w_2x_2 + ... + w_nx_n + b
	\]

	It tries to find the line (or hyperplane) that best fits the data.

	---
	""")

	# Load dataset from OpenML (Boston housing)
	@st.cache_data
	def load_data():
	boston = fetch_openml(name="boston", version=1, as_frame=True)
	df = boston.frame
	return df

	df = load_data()

	st.subheader("🏠 Dataset: Boston Housing Prices")
	st.markdown("This dataset contains information about houses in Boston suburbs and aims to predict the median value of owner-occupied homes.")
	st.dataframe(df.head(), use_container_width=True)

	# Feature selection
	target_col = "MEDV"
	X = df.drop(columns=target_col)
	y = df[target_col]

	# Feature scaling
	scaler = StandardScaler()
	X_scaled = scaler.fit_transform(X)

	# Train-test split
	X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

	# Model training
	model = LinearRegression()
	model.fit(X_train, y_train)
	y_pred = model.predict(X_test)

	# Evaluation
	mse = mean_squared_error(y_test, y_pred)
	r2 = r2_score(y_test, y_pred)

	st.success(f"📏 Model Performance: R² = {r2:.2f}, MSE = {mse:.2f}")

	# Feature coefficients
	st.markdown("### 🔍 Coefficients (Feature Importance)")
	coef_df = pd.DataFrame({
	"Feature": X.columns,
	"Coefficient": model.coef_
	}).sort_values(by="Coefficient", key=abs, ascending=False)

	st.dataframe(coef_df, use_container_width=True)

	# Actual vs Predicted Plot
	st.markdown("### 📈 Actual vs Predicted Home Prices")
	fig1, ax1 = plt.subplots(figsize=(8, 5))
	sns.scatterplot(x=y_test, y=y_pred, ax=ax1, alpha=0.7)
	ax1.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], '--r')
	ax1.set_xlabel("Actual MEDV")
	ax1.set_ylabel("Predicted MEDV")
	ax1.set_title("Actual vs Predicted Home Values")
	st.pyplot(fig1)

	# Residuals Plot
	st.markdown("### 🔧 Residual Plot (Errors)")
	residuals = y_test - y_pred
	fig2, ax2 = plt.subplots(figsize=(8, 5))
	sns.histplot(residuals, kde=True, ax=ax2, color="purple")
	ax2.set_title("Distribution of Residuals")
	ax2.set_xlabel("Error (Actual - Predicted)")
	st.pyplot(fig2)

	# Summary
	st.markdown("""
	---
	## 📚 Key Takeaways

	- Linear Regression is great for understanding relationships and making simple predictions.
	- Coefficients show how each feature affects the target.
	- Residuals help assess how well the model fits the data.

	### ✅ Use Linear Regression when:
	- The outcome is continuous
	- There’s a linear trend
	- You need interpretability over complexity

	🎯 Pro Tip: Try removing or combining features and observe how it affects accuracy and residuals!
	""")