sree4411 commited on
Commit
7cc2e69
ยท
verified ยท
1 Parent(s): 96c53e1

Update pages/Linear Regression.py

Browse files
Files changed (1) hide show
  1. pages/Linear Regression.py +132 -0
pages/Linear Regression.py CHANGED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.model_selection import train_test_split
5
+ from sklearn.linear_model import LinearRegression
6
+ from sklearn.metrics import mean_squared_error, r2_score
7
+ import matplotlib.pyplot as plt
8
+ import seaborn as sns
9
+
10
+ st.set_page_config(page_title="Explore Linear Regression", layout="wide")
11
+ st.title("๐Ÿ“ˆ Linear Regression Explained")
12
+
13
+ # Tabs
14
+
15
+ with st.sidebar:
16
+ st.header("๐Ÿ“Š Data Options")
17
+ uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
18
+
19
+ if uploaded_file is None:
20
+ st.warning("Using default dataset (Boston Housing dataset replacement). Upload your own for custom results.")
21
+
22
+ if uploaded_file:
23
+ df = pd.read_csv(uploaded_file)
24
+ else:
25
+ from sklearn.datasets import fetch_california_housing
26
+ data = fetch_california_housing()
27
+ df = pd.DataFrame(data.data, columns=data.feature_names)
28
+ df['target'] = data.target
29
+
30
+ # Tabs
31
+
32
+ tab1, tab2, tab3 = st.tabs(["๐Ÿ“– About Linear Regression", "โš™๏ธ Train Model", "๐Ÿ“ˆ Visualize"])
33
+
34
+ with tab1:
35
+ st.markdown("""
36
+ ## ๐Ÿ“ˆ What is Linear Regression?
37
+
38
+ **Linear Regression** is a fundamental algorithm in machine learning used to predict continuous numerical values.
39
+
40
+ ---
41
+ ### ๐Ÿ”ข The Linear Equation:
42
+
43
+ The general form:
44
+ $$
45
+ y = \beta_0 + \beta_1x_1 + \beta_2x_2 + ... + \beta_nx_n + \varepsilon
46
+ $$
47
+
48
+ - **y**: Output (target)
49
+ - **xโ‚, xโ‚‚, ..., xโ‚™**: Input features
50
+ - **ฮฒโ‚€**: Intercept
51
+ - **ฮฒโ‚, ..., ฮฒโ‚™**: Coefficients
52
+ - **ฮต**: Error term
53
+
54
+ ---
55
+ ### ๐Ÿง  How it Works:
56
+
57
+ 1. Fit a straight line that minimizes the squared error between predicted and actual values.
58
+ 2. Uses Ordinary Least Squares (OLS) for best-fit line.
59
+
60
+ ---
61
+ ### ๐Ÿงฎ Loss Function: Mean Squared Error (MSE)
62
+
63
+ $$
64
+ MSE = \frac{1}{n} \sum_{i=1}^{n}(y_i - \hat{y}_i)^2
65
+ $$
66
+
67
+ ---
68
+ ### ๐Ÿ“ฆ Use Cases:
69
+
70
+ - Predicting housing prices
71
+ - Estimating salaries
72
+ - Forecasting trends
73
+
74
+ ---
75
+ ### โœ… Pros:
76
+ - Simple and fast
77
+ - Interpretable
78
+ - Good baseline for regression tasks
79
+
80
+ ### โš ๏ธ Cons:
81
+ - Assumes linear relationship
82
+ - Sensitive to outliers
83
+ - Doesn't handle multicollinearity well
84
+
85
+ ---
86
+ ### ๐Ÿ“Œ Assumptions:
87
+ - Linearity
88
+ - Homoscedasticity
89
+ - Independence
90
+ - Normality of residuals
91
+
92
+ """)
93
+
94
+ with tab2:
95
+ st.subheader("โš™๏ธ Train Linear Regression Model")
96
+
97
+ target_col = st.selectbox("Select Target Variable", df.columns)
98
+ feature_cols = st.multiselect("Select Feature Columns", [col for col in df.columns if col != target_col])
99
+
100
+ if feature_cols and target_col:
101
+ X = df[feature_cols]
102
+ y = df[target_col]
103
+
104
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
105
+
106
+ model = LinearRegression()
107
+ model.fit(X_train, y_train)
108
+ y_pred = model.predict(X_test)
109
+
110
+ st.success(f"Model Trained Successfully! โœ…")
111
+ st.metric("Rยฒ Score", f"{r2_score(y_test, y_pred):.4f}")
112
+ st.metric("MSE", f"{mean_squared_error(y_test, y_pred):.4f}")
113
+
114
+ st.markdown("### Coefficients")
115
+ coef_df = pd.DataFrame({"Feature": feature_cols, "Coefficient": model.coef_})
116
+ st.dataframe(coef_df)
117
+
118
+ with tab3:
119
+ st.subheader("๐Ÿ“ˆ Actual vs Predicted Plot")
120
+
121
+ if feature_cols and target_col:
122
+ fig, ax = plt.subplots()
123
+ sns.scatterplot(x=y_test, y=y_pred, ax=ax)
124
+ ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
125
+ ax.set_xlabel("Actual")
126
+ ax.set_ylabel("Predicted")
127
+ ax.set_title("Actual vs Predicted")
128
+ st.pyplot(fig)
129
+
130
+ st.markdown("---")
131
+ st.markdown("### ๐Ÿ’ก Tip:")
132
+ st.info("If predictions look scattered from the red line, try using non-linear models or transform your features.")