import streamlit as st import numpy as np import pandas as pd import plotly.express as px import plotly.graph_objects as go from sklearn.linear_model import LinearRegression, LogisticRegression from sklearn.cluster import KMeans from sklearn.preprocessing import StandardScaler from sklearn.datasets import make_blobs def generate_regression_data(n_samples=100, noise=10): """Generate data for linear regression visualization.""" np.random.seed(42) X = np.linspace(0, 10, n_samples) y = 2 * X + 1 + np.random.normal(0, noise, n_samples) return X.reshape(-1, 1), y def generate_classification_data(n_samples=100): """Generate data for logistic regression visualization.""" np.random.seed(42) X = np.random.randn(n_samples, 2) y = (X[:, 0] + X[:, 1] > 0).astype(int) return X, y def generate_clustering_data(n_samples=300): """Generate data for clustering visualization.""" np.random.seed(42) X, _ = make_blobs(n_samples=n_samples, centers=3, cluster_std=1.5) return X def show(): """Display the interactive machine learning visualizations.""" st.title("Interactive Machine Learning Visualizations") # Introduction st.info(""" This module provides interactive visualizations of three fundamental machine learning concepts: - 📈 Linear Regression: Predict continuous values - 🎯 Logistic Regression: Classify binary outcomes - 🔍 K-Means Clustering: Group similar data points """) # Create tabs for different ML concepts tab1, tab2, tab3 = st.tabs(["📈 Linear Regression", "🎯 Logistic Regression", "🔍 Clustering"]) with tab1: st.subheader("Linear Regression") # Interactive controls col1, col2 = st.columns(2) with col1: n_samples = st.slider("Number of samples", 50, 200, 100) with col2: noise = st.slider("Noise level", 1, 20, 10) # Generate and plot data X, y = generate_regression_data(n_samples, noise) # Create scatter plot with dark theme fig = px.scatter(x=X.flatten(), y=y, title="Linear Regression Visualization", labels={'x': 'Feature (X)', 'y': 'Target (y)'}, template="plotly_dark") # Add regression line model = LinearRegression() model.fit(X, y) y_pred = model.predict(X) fig.add_trace(go.Scatter(x=X.flatten(), y=y_pred, mode='lines', name='Regression Line', line=dict(color='red'))) fig.update_layout( plot_bgcolor='#1E1E1E', paper_bgcolor='#1E1E1E', font=dict(color='white') ) st.plotly_chart(fig, use_container_width=True) # Display model information st.success(f""" **Model Information** - Slope (Coefficient): {model.coef_[0]:.2f} - Intercept: {model.intercept_:.2f} - R² Score: {model.score(X, y):.2f} """) with tab2: st.subheader("Logistic Regression") # Generate data X, y = generate_classification_data() # Create scatter plot with dark theme fig = px.scatter(x=X[:, 0], y=X[:, 1], color=y.astype(str), title="Logistic Regression Visualization", labels={'x': 'Feature 1', 'y': 'Feature 2'}, template="plotly_dark") # Add decision boundary model = LogisticRegression() model.fit(X, y) # Create mesh grid for decision boundary x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1)) Z = model.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) fig.add_trace(go.Contour(x=xx[0], y=yy[:, 0], z=Z, showscale=False, opacity=0.3, colorscale='RdBu')) fig.update_layout( plot_bgcolor='#1E1E1E', paper_bgcolor='#1E1E1E', font=dict(color='white') ) st.plotly_chart(fig, use_container_width=True) # Display model information st.success(f""" **Model Information** - Accuracy: {model.score(X, y):.2f} - Coefficients: [{model.coef_[0][0]:.2f}, {model.coef_[0][1]:.2f}] - Intercept: {model.intercept_[0]:.2f} """) with tab3: st.subheader("K-Means Clustering") # Interactive controls n_clusters = st.slider("Number of clusters", 2, 6, 3) # Generate data X = generate_clustering_data() # Perform clustering kmeans = KMeans(n_clusters=n_clusters, random_state=42) clusters = kmeans.fit_predict(X) # Create scatter plot with dark theme fig = px.scatter(x=X[:, 0], y=X[:, 1], color=clusters.astype(str), title="K-Means Clustering Visualization", labels={'x': 'Feature 1', 'y': 'Feature 2'}, template="plotly_dark") # Add cluster centers fig.add_trace(go.Scatter(x=kmeans.cluster_centers_[:, 0], y=kmeans.cluster_centers_[:, 1], mode='markers', marker=dict(size=12, symbol='star', color='white'), name='Cluster Centers')) fig.update_layout( plot_bgcolor='#1E1E1E', paper_bgcolor='#1E1E1E', font=dict(color='white') ) st.plotly_chart(fig, use_container_width=True) # Display clustering information st.success(f""" **Clustering Information** - Number of Clusters: {n_clusters} - Inertia (Sum of Squared Distances): {kmeans.inertia_:.2f} """) # Footer st.info(""" **Key Takeaways** - Linear Regression: Fits a line to predict continuous values - Logistic Regression: Creates a decision boundary for classification - K-Means Clustering: Groups similar data points into clusters """)