2ndproject / app.py
V8055's picture
Update app.py
8eac679 verified
# app.py
import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import io
import base64
def generate_sample_data():
np.random.seed(42)
X = np.random.rand(100, 1) * 10
y = 2 * X + 1 + np.random.randn(100, 1) * 2
return pd.DataFrame({'X': X.flatten(), 'y': y.flatten()})
def train_model(df):
X = df[['X']]
y = df['y']
# Split the data
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# Create and train the model
model = LinearRegression()
model.fit(X_train, y_train)
# Make predictions
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)
return {
'model': model,
'X_train': X_train, 'X_test': X_test,
'y_train': y_train, 'y_test': y_test,
'y_train_pred': y_train_pred, 'y_test_pred': y_test_pred
}
def plot_regression(results):
fig, ax = plt.subplots(figsize=(10, 6))
# Plot training data
ax.scatter(results['X_train'], results['y_train'],
color='blue', alpha=0.5, label='Training Data')
# Plot test data
ax.scatter(results['X_test'], results['y_test'],
color='green', alpha=0.5, label='Test Data')
# Plot regression line
X_line = np.linspace(0, 10, 100).reshape(-1, 1)
y_line = results['model'].predict(X_line)
ax.plot(X_line, y_line, color='red', label='Regression Line')
ax.set_xlabel('X')
ax.set_ylabel('y')
ax.set_title('Linear Regression: Training and Test Data with Regression Line')
ax.legend()
ax.grid(True, alpha=0.3)
return fig
def main():
st.title("Linear Regression Demo")
st.write("""
This app demonstrates simple Linear Regression using scikit-learn.
You can either use the sample dataset or upload your own CSV file.
""")
# Data selection
data_option = st.radio(
"Choose data source:",
("Use sample data", "Upload CSV file")
)
if data_option == "Use sample data":
df = generate_sample_data()
else:
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if uploaded_file is not None:
try:
df = pd.read_csv(uploaded_file)
if len(df.columns) != 2:
st.error("Please upload a CSV file with exactly 2 columns (X and y)")
return
df.columns = ['X', 'y']
except Exception as e:
st.error(f"Error reading file: {str(e)}")
return
else:
st.info("Please upload a CSV file")
return
# Display sample of the data
st.subheader("Data Preview")
st.write(df.head())
# Train model and display results
results = train_model(df)
model = results['model']
# Model metrics
train_mse = mean_squared_error(results['y_train'], results['y_train_pred'])
test_mse = mean_squared_error(results['y_test'], results['y_test_pred'])
train_r2 = r2_score(results['y_train'], results['y_train_pred'])
test_r2 = r2_score(results['y_test'], results['y_test_pred'])
st.subheader("Model Performance Metrics")
col1, col2 = st.columns(2)
with col1:
st.metric("Training MSE", f"{train_mse:.4f}")
st.metric("Training R²", f"{train_r2:.4f}")
with col2:
st.metric("Test MSE", f"{test_mse:.4f}")
st.metric("Test R²", f"{test_r2:.4f}")
st.write(f"Model Equation: y = {model.coef_[0]:.4f}x + {model.intercept_:.4f}")
# Plot
st.subheader("Regression Plot")
fig = plot_regression(results)
st.pyplot(fig)
# Prediction interface
st.subheader("Make Predictions")
x_input = st.number_input("Enter a value for X:", value=5.0)
prediction = model.predict([[x_input]])[0]
st.write(f"Predicted y: {prediction:.4f}")
if __name__ == "__main__":
main()