Spaces:
Sleeping
Sleeping
File size: 6,695 Bytes
78b86a9 a7c26a1 78b86a9 dfe5163 a7c26a1 fa60705 a7c26a1 fa60705 a7c26a1 fa60705 a7c26a1 fa60705 a7c26a1 fa60705 a7c26a1 fa60705 a7c26a1 fa60705 a7c26a1 fa60705 a7c26a1 fa60705 a7c26a1 fa60705 a7c26a1 dfe5163 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
import streamlit as st
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs
def generate_regression_data(n_samples=100, noise=10):
"""Generate data for linear regression visualization."""
np.random.seed(42)
X = np.linspace(0, 10, n_samples)
y = 2 * X + 1 + np.random.normal(0, noise, n_samples)
return X.reshape(-1, 1), y
def generate_classification_data(n_samples=100):
"""Generate data for logistic regression visualization."""
np.random.seed(42)
X = np.random.randn(n_samples, 2)
y = (X[:, 0] + X[:, 1] > 0).astype(int)
return X, y
def generate_clustering_data(n_samples=300):
"""Generate data for clustering visualization."""
np.random.seed(42)
X, _ = make_blobs(n_samples=n_samples, centers=3, cluster_std=1.5)
return X
def show():
"""Display the interactive machine learning visualizations."""
st.title("Interactive Machine Learning Visualizations")
# Introduction
st.info("""
This module provides interactive visualizations of three fundamental machine learning concepts:
- π Linear Regression: Predict continuous values
- π― Logistic Regression: Classify binary outcomes
- π K-Means Clustering: Group similar data points
""")
# Create tabs for different ML concepts
tab1, tab2, tab3 = st.tabs(["π Linear Regression", "π― Logistic Regression", "π Clustering"])
with tab1:
st.subheader("Linear Regression")
# Interactive controls
col1, col2 = st.columns(2)
with col1:
n_samples = st.slider("Number of samples", 50, 200, 100)
with col2:
noise = st.slider("Noise level", 1, 20, 10)
# Generate and plot data
X, y = generate_regression_data(n_samples, noise)
# Create scatter plot with dark theme
fig = px.scatter(x=X.flatten(), y=y,
title="Linear Regression Visualization",
labels={'x': 'Feature (X)', 'y': 'Target (y)'},
template="plotly_dark")
# Add regression line
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
fig.add_trace(go.Scatter(x=X.flatten(), y=y_pred,
mode='lines',
name='Regression Line',
line=dict(color='red')))
fig.update_layout(
plot_bgcolor='#1E1E1E',
paper_bgcolor='#1E1E1E',
font=dict(color='white')
)
st.plotly_chart(fig, use_container_width=True)
# Display model information
st.success(f"""
**Model Information**
- Slope (Coefficient): {model.coef_[0]:.2f}
- Intercept: {model.intercept_:.2f}
- RΒ² Score: {model.score(X, y):.2f}
""")
with tab2:
st.subheader("Logistic Regression")
# Generate data
X, y = generate_classification_data()
# Create scatter plot with dark theme
fig = px.scatter(x=X[:, 0], y=X[:, 1],
color=y.astype(str),
title="Logistic Regression Visualization",
labels={'x': 'Feature 1', 'y': 'Feature 2'},
template="plotly_dark")
# Add decision boundary
model = LogisticRegression()
model.fit(X, y)
# Create mesh grid for decision boundary
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
np.arange(y_min, y_max, 0.1))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
fig.add_trace(go.Contour(x=xx[0], y=yy[:, 0], z=Z,
showscale=False,
opacity=0.3,
colorscale='RdBu'))
fig.update_layout(
plot_bgcolor='#1E1E1E',
paper_bgcolor='#1E1E1E',
font=dict(color='white')
)
st.plotly_chart(fig, use_container_width=True)
# Display model information
st.success(f"""
**Model Information**
- Accuracy: {model.score(X, y):.2f}
- Coefficients: [{model.coef_[0][0]:.2f}, {model.coef_[0][1]:.2f}]
- Intercept: {model.intercept_[0]:.2f}
""")
with tab3:
st.subheader("K-Means Clustering")
# Interactive controls
n_clusters = st.slider("Number of clusters", 2, 6, 3)
# Generate data
X = generate_clustering_data()
# Perform clustering
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
clusters = kmeans.fit_predict(X)
# Create scatter plot with dark theme
fig = px.scatter(x=X[:, 0], y=X[:, 1],
color=clusters.astype(str),
title="K-Means Clustering Visualization",
labels={'x': 'Feature 1', 'y': 'Feature 2'},
template="plotly_dark")
# Add cluster centers
fig.add_trace(go.Scatter(x=kmeans.cluster_centers_[:, 0],
y=kmeans.cluster_centers_[:, 1],
mode='markers',
marker=dict(size=12, symbol='star', color='white'),
name='Cluster Centers'))
fig.update_layout(
plot_bgcolor='#1E1E1E',
paper_bgcolor='#1E1E1E',
font=dict(color='white')
)
st.plotly_chart(fig, use_container_width=True)
# Display clustering information
st.success(f"""
**Clustering Information**
- Number of Clusters: {n_clusters}
- Inertia (Sum of Squared Distances): {kmeans.inertia_:.2f}
""")
# Footer
st.info("""
**Key Takeaways**
- Linear Regression: Fits a line to predict continuous values
- Logistic Regression: Creates a decision boundary for classification
- K-Means Clustering: Groups similar data points into clusters
""") |