Spaces:
Sleeping
Sleeping
File size: 4,160 Bytes
8eac679 53de850 8eac679 53de850 8eac679 53de850 8eac679 53de850 0b03d9d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
# app.py
import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import io
import base64
def generate_sample_data():
np.random.seed(42)
X = np.random.rand(100, 1) * 10
y = 2 * X + 1 + np.random.randn(100, 1) * 2
return pd.DataFrame({'X': X.flatten(), 'y': y.flatten()})
def train_model(df):
X = df[['X']]
y = df['y']
# Split the data
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# Create and train the model
model = LinearRegression()
model.fit(X_train, y_train)
# Make predictions
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)
return {
'model': model,
'X_train': X_train, 'X_test': X_test,
'y_train': y_train, 'y_test': y_test,
'y_train_pred': y_train_pred, 'y_test_pred': y_test_pred
}
def plot_regression(results):
fig, ax = plt.subplots(figsize=(10, 6))
# Plot training data
ax.scatter(results['X_train'], results['y_train'],
color='blue', alpha=0.5, label='Training Data')
# Plot test data
ax.scatter(results['X_test'], results['y_test'],
color='green', alpha=0.5, label='Test Data')
# Plot regression line
X_line = np.linspace(0, 10, 100).reshape(-1, 1)
y_line = results['model'].predict(X_line)
ax.plot(X_line, y_line, color='red', label='Regression Line')
ax.set_xlabel('X')
ax.set_ylabel('y')
ax.set_title('Linear Regression: Training and Test Data with Regression Line')
ax.legend()
ax.grid(True, alpha=0.3)
return fig
def main():
st.title("Linear Regression Demo")
st.write("""
This app demonstrates simple Linear Regression using scikit-learn.
You can either use the sample dataset or upload your own CSV file.
""")
# Data selection
data_option = st.radio(
"Choose data source:",
("Use sample data", "Upload CSV file")
)
if data_option == "Use sample data":
df = generate_sample_data()
else:
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if uploaded_file is not None:
try:
df = pd.read_csv(uploaded_file)
if len(df.columns) != 2:
st.error("Please upload a CSV file with exactly 2 columns (X and y)")
return
df.columns = ['X', 'y']
except Exception as e:
st.error(f"Error reading file: {str(e)}")
return
else:
st.info("Please upload a CSV file")
return
# Display sample of the data
st.subheader("Data Preview")
st.write(df.head())
# Train model and display results
results = train_model(df)
model = results['model']
# Model metrics
train_mse = mean_squared_error(results['y_train'], results['y_train_pred'])
test_mse = mean_squared_error(results['y_test'], results['y_test_pred'])
train_r2 = r2_score(results['y_train'], results['y_train_pred'])
test_r2 = r2_score(results['y_test'], results['y_test_pred'])
st.subheader("Model Performance Metrics")
col1, col2 = st.columns(2)
with col1:
st.metric("Training MSE", f"{train_mse:.4f}")
st.metric("Training R²", f"{train_r2:.4f}")
with col2:
st.metric("Test MSE", f"{test_mse:.4f}")
st.metric("Test R²", f"{test_r2:.4f}")
st.write(f"Model Equation: y = {model.coef_[0]:.4f}x + {model.intercept_:.4f}")
# Plot
st.subheader("Regression Plot")
fig = plot_regression(results)
st.pyplot(fig)
# Prediction interface
st.subheader("Make Predictions")
x_input = st.number_input("Enter a value for X:", value=5.0)
prediction = model.predict([[x_input]])[0]
st.write(f"Predicted y: {prediction:.4f}")
if __name__ == "__main__":
main()
|