UmaKumpatla commited on
Commit
4fbc6ef
Β·
verified Β·
1 Parent(s): 1d1b709

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py CHANGED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.datasets import fetch_openml
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.linear_model import LinearRegression
7
+ from sklearn.preprocessing import StandardScaler
8
+ from sklearn.metrics import mean_squared_error, r2_score
9
+ import matplotlib.pyplot as plt
10
+ import seaborn as sns
11
+
12
+ # Page setup
13
+ st.set_page_config(page_title="Explore Linear Regression", layout="wide")
14
+ st.title("🏑 Linear Regression with the Boston Housing Dataset")
15
+
16
+ # Intro
17
+ st.markdown("""
18
+ ## πŸ“˜ What is Linear Regression?
19
+
20
+ Linear Regression models the relationship between a continuous outcome and one or more input variables (features).
21
+
22
+ **Equation:**
23
+ \[
24
+ \\hat{y} = w_1x_1 + w_2x_2 + ... + w_nx_n + b
25
+ \]
26
+
27
+ It tries to find the line (or hyperplane) that best fits the data.
28
+
29
+ ---
30
+ """)
31
+
32
+ # Load dataset from OpenML (Boston housing)
33
+ @st.cache_data
34
+ def load_data():
35
+ boston = fetch_openml(name="boston", version=1, as_frame=True)
36
+ df = boston.frame
37
+ return df
38
+
39
+ df = load_data()
40
+
41
+ st.subheader("🏠 Dataset: Boston Housing Prices")
42
+ st.markdown("This dataset contains information about houses in Boston suburbs and aims to predict the **median value of owner-occupied homes**.")
43
+ st.dataframe(df.head(), use_container_width=True)
44
+
45
+ # Feature selection
46
+ target_col = "MEDV"
47
+ X = df.drop(columns=target_col)
48
+ y = df[target_col]
49
+
50
+ # Feature scaling
51
+ scaler = StandardScaler()
52
+ X_scaled = scaler.fit_transform(X)
53
+
54
+ # Train-test split
55
+ X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
56
+
57
+ # Model training
58
+ model = LinearRegression()
59
+ model.fit(X_train, y_train)
60
+ y_pred = model.predict(X_test)
61
+
62
+ # Evaluation
63
+ mse = mean_squared_error(y_test, y_pred)
64
+ r2 = r2_score(y_test, y_pred)
65
+
66
+ st.success(f"πŸ“ Model Performance: RΒ² = {r2:.2f}, MSE = {mse:.2f}")
67
+
68
+ # Feature coefficients
69
+ st.markdown("### πŸ” Coefficients (Feature Importance)")
70
+ coef_df = pd.DataFrame({
71
+ "Feature": X.columns,
72
+ "Coefficient": model.coef_
73
+ }).sort_values(by="Coefficient", key=abs, ascending=False)
74
+
75
+ st.dataframe(coef_df, use_container_width=True)
76
+
77
+ # Actual vs Predicted Plot
78
+ st.markdown("### πŸ“ˆ Actual vs Predicted Home Prices")
79
+ fig1, ax1 = plt.subplots(figsize=(8, 5))
80
+ sns.scatterplot(x=y_test, y=y_pred, ax=ax1, alpha=0.7)
81
+ ax1.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], '--r')
82
+ ax1.set_xlabel("Actual MEDV")
83
+ ax1.set_ylabel("Predicted MEDV")
84
+ ax1.set_title("Actual vs Predicted Home Values")
85
+ st.pyplot(fig1)
86
+
87
+ # Residuals Plot
88
+ st.markdown("### πŸ”§ Residual Plot (Errors)")
89
+ residuals = y_test - y_pred
90
+ fig2, ax2 = plt.subplots(figsize=(8, 5))
91
+ sns.histplot(residuals, kde=True, ax=ax2, color="purple")
92
+ ax2.set_title("Distribution of Residuals")
93
+ ax2.set_xlabel("Error (Actual - Predicted)")
94
+ st.pyplot(fig2)
95
+
96
+ # Summary
97
+ st.markdown("""
98
+ ---
99
+ ## πŸ“š Key Takeaways
100
+
101
+ - **Linear Regression** is great for understanding relationships and making simple predictions.
102
+ - **Coefficients** show how each feature affects the target.
103
+ - **Residuals** help assess how well the model fits the data.
104
+
105
+ ### βœ… Use Linear Regression when:
106
+ - The outcome is **continuous**
107
+ - There’s a **linear trend**
108
+ - You need **interpretability** over complexity
109
+
110
+ 🎯 *Pro Tip:* Try removing or combining features and observe how it affects accuracy and residuals!
111
+ """)