Spaces:

sree4411
/

ML_ALGORITHMS

Sleeping

App Files Files Community

sree4411 commited on Apr 8, 2025

Commit

7cc2e69

verified ·

1 Parent(s): 96c53e1

Update pages/Linear Regression.py

Browse files

Files changed (1) hide show

pages/Linear Regression.py +132 -0

pages/Linear Regression.py CHANGED Viewed

	@@ -0,0 +1,132 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LinearRegression
+from sklearn.metrics import mean_squared_error, r2_score
+import matplotlib.pyplot as plt
+import seaborn as sns
+st.set_page_config(page_title="Explore Linear Regression", layout="wide")
+st.title("📈 Linear Regression Explained")
+# Tabs
+with st.sidebar:
+    st.header("📊 Data Options")
+    uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
+    if uploaded_file is None:
+        st.warning("Using default dataset (Boston Housing dataset replacement). Upload your own for custom results.")
+if uploaded_file:
+    df = pd.read_csv(uploaded_file)
+else:
+    from sklearn.datasets import fetch_california_housing
+    data = fetch_california_housing()
+    df = pd.DataFrame(data.data, columns=data.feature_names)
+    df['target'] = data.target
+# Tabs
+tab1, tab2, tab3 = st.tabs(["📖 About Linear Regression", "⚙️ Train Model", "📈 Visualize"])
+with tab1:
+    st.markdown("""
+    ## 📈 What is Linear Regression?
+    **Linear Regression** is a fundamental algorithm in machine learning used to predict continuous numerical values.
+    ---
+    ### 🔢 The Linear Equation:
+    The general form:
+    $$
+    y = \beta_0 + \beta_1x_1 + \beta_2x_2 + ... + \beta_nx_n + \varepsilon
+    $$
+    - **y**: Output (target)
+    - **x₁, x₂, ..., xₙ**: Input features
+    - **β₀**: Intercept
+    - **β₁, ..., βₙ**: Coefficients
+    - **ε**: Error term
+    ---
+    ### 🧠 How it Works:
+    1. Fit a straight line that minimizes the squared error between predicted and actual values.
+    2. Uses Ordinary Least Squares (OLS) for best-fit line.
+    ---
+    ### 🧮 Loss Function: Mean Squared Error (MSE)
+    $$
+    MSE = \frac{1}{n} \sum_{i=1}^{n}(y_i - \hat{y}_i)^2
+    $$
+    ---
+    ### 📦 Use Cases:
+    - Predicting housing prices
+    - Estimating salaries
+    - Forecasting trends
+    ---
+    ### ✅ Pros:
+    - Simple and fast
+    - Interpretable
+    - Good baseline for regression tasks
+    ### ⚠️ Cons:
+    - Assumes linear relationship
+    - Sensitive to outliers
+    - Doesn't handle multicollinearity well
+    ---
+    ### 📌 Assumptions:
+    - Linearity
+    - Homoscedasticity
+    - Independence
+    - Normality of residuals
+    """)
+with tab2:
+    st.subheader("⚙️ Train Linear Regression Model")
+    target_col = st.selectbox("Select Target Variable", df.columns)
+    feature_cols = st.multiselect("Select Feature Columns", [col for col in df.columns if col != target_col])
+    if feature_cols and target_col:
+        X = df[feature_cols]
+        y = df[target_col]
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        model = LinearRegression()
+        model.fit(X_train, y_train)
+        y_pred = model.predict(X_test)
+        st.success(f"Model Trained Successfully! ✅")
+        st.metric("R² Score", f"{r2_score(y_test, y_pred):.4f}")
+        st.metric("MSE", f"{mean_squared_error(y_test, y_pred):.4f}")
+        st.markdown("### Coefficients")
+        coef_df = pd.DataFrame({"Feature": feature_cols, "Coefficient": model.coef_})
+        st.dataframe(coef_df)
+with tab3:
+    st.subheader("📈 Actual vs Predicted Plot")
+    if feature_cols and target_col:
+        fig, ax = plt.subplots()
+        sns.scatterplot(x=y_test, y=y_pred, ax=ax)
+        ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
+        ax.set_xlabel("Actual")
+        ax.set_ylabel("Predicted")
+        ax.set_title("Actual vs Predicted")
+        st.pyplot(fig)
+    st.markdown("---")
+    st.markdown("### 💡 Tip:")
+    st.info("If predictions look scattered from the red line, try using non-linear models or transform your features.")