# app.py
import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import io
import base64

def generate_sample_data():
    np.random.seed(42)
    X = np.random.rand(100, 1) * 10
    y = 2 * X + 1 + np.random.randn(100, 1) * 2
    return pd.DataFrame({'X': X.flatten(), 'y': y.flatten()})

def train_model(df):
    X = df[['X']]
    y = df['y']
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    # Create and train the model
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    # Make predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    return {
        'model': model,
        'X_train': X_train, 'X_test': X_test,
        'y_train': y_train, 'y_test': y_test,
        'y_train_pred': y_train_pred, 'y_test_pred': y_test_pred
    }

def plot_regression(results):
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Plot training data
    ax.scatter(results['X_train'], results['y_train'], 
              color='blue', alpha=0.5, label='Training Data')
    # Plot test data
    ax.scatter(results['X_test'], results['y_test'], 
              color='green', alpha=0.5, label='Test Data')
    
    # Plot regression line
    X_line = np.linspace(0, 10, 100).reshape(-1, 1)
    y_line = results['model'].predict(X_line)
    ax.plot(X_line, y_line, color='red', label='Regression Line')
    
    ax.set_xlabel('X')
    ax.set_ylabel('y')
    ax.set_title('Linear Regression: Training and Test Data with Regression Line')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    return fig

def main():
    st.title("Linear Regression Demo")
    st.write("""
    This app demonstrates simple Linear Regression using scikit-learn.
    You can either use the sample dataset or upload your own CSV file.
    """)
    
    # Data selection
    data_option = st.radio(
        "Choose data source:",
        ("Use sample data", "Upload CSV file")
    )
    
    if data_option == "Use sample data":
        df = generate_sample_data()
    else:
        uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
        if uploaded_file is not None:
            try:
                df = pd.read_csv(uploaded_file)
                if len(df.columns) != 2:
                    st.error("Please upload a CSV file with exactly 2 columns (X and y)")
                    return
                df.columns = ['X', 'y']
            except Exception as e:
                st.error(f"Error reading file: {str(e)}")
                return
        else:
            st.info("Please upload a CSV file")
            return
    
    # Display sample of the data
    st.subheader("Data Preview")
    st.write(df.head())
    
    # Train model and display results
    results = train_model(df)
    model = results['model']
    
    # Model metrics
    train_mse = mean_squared_error(results['y_train'], results['y_train_pred'])
    test_mse = mean_squared_error(results['y_test'], results['y_test_pred'])
    train_r2 = r2_score(results['y_train'], results['y_train_pred'])
    test_r2 = r2_score(results['y_test'], results['y_test_pred'])
    
    st.subheader("Model Performance Metrics")
    col1, col2 = st.columns(2)
    with col1:
        st.metric("Training MSE", f"{train_mse:.4f}")
        st.metric("Training R²", f"{train_r2:.4f}")
    with col2:
        st.metric("Test MSE", f"{test_mse:.4f}")
        st.metric("Test R²", f"{test_r2:.4f}")
    
    st.write(f"Model Equation: y = {model.coef_[0]:.4f}x + {model.intercept_:.4f}")
    
    # Plot
    st.subheader("Regression Plot")
    fig = plot_regression(results)
    st.pyplot(fig)
    
    # Prediction interface
    st.subheader("Make Predictions")
    x_input = st.number_input("Enter a value for X:", value=5.0)
    prediction = model.predict([[x_input]])[0]
    st.write(f"Predicted y: {prediction:.4f}")

if __name__ == "__main__":
    main()