# app.py import streamlit as st import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score import io import base64 def generate_sample_data(): np.random.seed(42) X = np.random.rand(100, 1) * 10 y = 2 * X + 1 + np.random.randn(100, 1) * 2 return pd.DataFrame({'X': X.flatten(), 'y': y.flatten()}) def train_model(df): X = df[['X']] y = df['y'] # Split the data X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42 ) # Create and train the model model = LinearRegression() model.fit(X_train, y_train) # Make predictions y_train_pred = model.predict(X_train) y_test_pred = model.predict(X_test) return { 'model': model, 'X_train': X_train, 'X_test': X_test, 'y_train': y_train, 'y_test': y_test, 'y_train_pred': y_train_pred, 'y_test_pred': y_test_pred } def plot_regression(results): fig, ax = plt.subplots(figsize=(10, 6)) # Plot training data ax.scatter(results['X_train'], results['y_train'], color='blue', alpha=0.5, label='Training Data') # Plot test data ax.scatter(results['X_test'], results['y_test'], color='green', alpha=0.5, label='Test Data') # Plot regression line X_line = np.linspace(0, 10, 100).reshape(-1, 1) y_line = results['model'].predict(X_line) ax.plot(X_line, y_line, color='red', label='Regression Line') ax.set_xlabel('X') ax.set_ylabel('y') ax.set_title('Linear Regression: Training and Test Data with Regression Line') ax.legend() ax.grid(True, alpha=0.3) return fig def main(): st.title("Linear Regression Demo") st.write(""" This app demonstrates simple Linear Regression using scikit-learn. You can either use the sample dataset or upload your own CSV file. """) # Data selection data_option = st.radio( "Choose data source:", ("Use sample data", "Upload CSV file") ) if data_option == "Use sample data": df = generate_sample_data() else: uploaded_file = st.file_uploader("Choose a CSV file", type="csv") if uploaded_file is not None: try: df = pd.read_csv(uploaded_file) if len(df.columns) != 2: st.error("Please upload a CSV file with exactly 2 columns (X and y)") return df.columns = ['X', 'y'] except Exception as e: st.error(f"Error reading file: {str(e)}") return else: st.info("Please upload a CSV file") return # Display sample of the data st.subheader("Data Preview") st.write(df.head()) # Train model and display results results = train_model(df) model = results['model'] # Model metrics train_mse = mean_squared_error(results['y_train'], results['y_train_pred']) test_mse = mean_squared_error(results['y_test'], results['y_test_pred']) train_r2 = r2_score(results['y_train'], results['y_train_pred']) test_r2 = r2_score(results['y_test'], results['y_test_pred']) st.subheader("Model Performance Metrics") col1, col2 = st.columns(2) with col1: st.metric("Training MSE", f"{train_mse:.4f}") st.metric("Training R²", f"{train_r2:.4f}") with col2: st.metric("Test MSE", f"{test_mse:.4f}") st.metric("Test R²", f"{test_r2:.4f}") st.write(f"Model Equation: y = {model.coef_[0]:.4f}x + {model.intercept_:.4f}") # Plot st.subheader("Regression Plot") fig = plot_regression(results) st.pyplot(fig) # Prediction interface st.subheader("Make Predictions") x_input = st.number_input("Enter a value for X:", value=5.0) prediction = model.predict([[x_input]])[0] st.write(f"Predicted y: {prediction:.4f}") if __name__ == "__main__": main()