Visualizing-ML / gd_sgd_app.py
suvradeepp's picture
files
49575d3
import streamlit as st
import numpy as np
import plotly.graph_objs as go
def convex_function(x, y):
return x**2 + y**2
def non_convex_function(x, y):
return np.sin(x) * np.cos(y) * x * y
def gradient_descent(func, grad_func, start, learning_rate, n_iter):
path = [start]
for _ in range(n_iter):
grad = grad_func(path[-1])
next_point = path[-1] - learning_rate * grad
path.append(next_point)
return np.array(path)
def stochastic_gradient_descent(func, grad_func, start, learning_rate, n_iter):
path = [start]
for _ in range(n_iter):
grad = grad_func(path[-1]) + np.random.normal(0, 0.1, 2)
next_point = path[-1] - learning_rate * grad
path.append(next_point)
return np.array(path)
def grad_convex(point):
x, y = point
return np.array([2*x, 2*y])
def grad_non_convex(point):
x, y = point
return np.array([np.cos(x) * np.cos(y) * y + np.sin(x) * np.sin(y) * x, np.cos(x) * np.cos(y) * x - np.sin(x) * np.sin(y) * y])
def simulated_annealing(func, start, temp, cooling_rate, n_iter):
path = [start]
current_point = start
lowest_point = current_point
for i in range(n_iter):
next_point = current_point + np.random.normal(0, 1, 2)
delta_E = func(next_point[0], next_point[1]) - func(current_point[0], current_point[1])
if delta_E < 0 or np.exp(-delta_E / temp) > np.random.rand():
current_point = next_point
if func(current_point[0], current_point[1]) < func(lowest_point[0], lowest_point[1]):
lowest_point = current_point
path.append(current_point)
temp *= cooling_rate
return np.array(path), lowest_point
def plot_3d_surface(func, path, title, alphas=None, lowest_point=None):
x_min, x_max = min(path[:, 0].min(), -6), max(path[:, 0].max(), 6)
y_min, y_max = min(path[:, 1].min(), -6), max(path[:, 1].max(), 6)
x = np.linspace(x_min, x_max, 200)
y = np.linspace(y_min, y_max, 200)
X, Y = np.meshgrid(x, y)
Z = func(X, Y)
fig = go.Figure(data=[go.Surface(z=Z, x=X, y=Y, opacity=0.7)])
if alphas is None:
alphas = [1.0] * len(path)
for i in range(len(path) - 1):
fig.add_trace(go.Scatter3d(
x=path[i:i+2, 0],
y=path[i:i+2, 1],
z=func(path[i:i+2, 0], path[i:i+2, 1]),
mode='lines',
line=dict(color='orange', width=4),
opacity=alphas[i],
showlegend=False
))
fig.add_trace(go.Scatter3d(
x=path[:, 0],
y=path[:, 1],
z=func(path[:, 0], path[:, 1]),
mode='markers',
marker=dict(size=4, color='orange', opacity=alphas[-1]),
name='Path'
))
fig.add_trace(go.Scatter3d(
x=[path[0, 0]],
y=[path[0, 1]],
z=[func(path[0, 0], path[0, 1])],
mode='markers',
marker=dict(size=6, color='green', opacity=alphas[0]),
name='Start'
))
if lowest_point is not None:
fig.add_trace(go.Scatter3d(
x=[lowest_point[0]],
y=[lowest_point[1]],
z=[func(lowest_point[0], lowest_point[1])],
mode='markers',
marker=dict(size=6, color='red', opacity=alphas[-1]),
name='Lowest Observed'
))
fig.update_layout(title=title, scene=dict(
xaxis_title='X',
yaxis_title='Y',
zaxis_title='Z'))
return fig
st.title("Convex and Non-Convex SGD Optimization")
tab1, tab2, tab3 = st.tabs(["Gradient Descent", "Stochastic Gradient Descent", "Simulated Annealing"])
st.sidebar.header("Parameters")
learning_rate = st.sidebar.slider("Learning Rate", 0.01, 1.0, 0.1)
n_iter = st.sidebar.slider("Number of Iterations", 10, 100, 50)
convex_start_x = st.sidebar.slider("Convex Start X", -3.0, 3.0, 2.5)
convex_start_y = st.sidebar.slider("Convex Start Y", -3.0, 3.0, 2.5)
non_convex_start_x = st.sidebar.slider("Non-Convex Start X", -3.0, 3.0, 2.5)
non_convex_start_y = st.sidebar.slider("Non-Convex Start Y", -3.0, 3.0, 2.5)
temp = st.sidebar.slider("Initial Temperature (Simulated Annealing)", 1.0, 10.0, 5.0)
cooling_rate = st.sidebar.slider("Cooling Rate (Simulated Annealing)", 0.8, 0.99, 0.95)
convex_start = np.array([convex_start_x, convex_start_y])
non_convex_start = np.array([non_convex_start_x, non_convex_start_y])
with tab1:
st.header("Gradient Descent")
st.write("Visualizing gradient descent on convex and non-convex functions.")
with st.expander("Gradient Descent Algorithm and Math"):
st.markdown(r"""
### Gradient Descent Algorithm
**Step-by-step Algorithm**:
1. Initialize starting point $\mathbf{x}_0$.
2. For each iteration $t$:
- Compute the gradient $\nabla f(\mathbf{x}_t)$.
- Update the current point: $\mathbf{x}_{t+1} = \mathbf{x}_t - \alpha \nabla f(\mathbf{x}_t)$.
**Mathematical Formulation**:
$$
\mathbf{x}_{t+1} = \mathbf{x}_t - \alpha \nabla f(\mathbf{x}_t)
$$
where:
- $\mathbf{x}_t$ is the current point.
- $\alpha$ is the learning rate.
- $\nabla f(\mathbf{x}_t)$ is the gradient of the function at $\mathbf{x}_t$.
""")
convex_path_gd = gradient_descent(convex_function, grad_convex, convex_start, learning_rate, n_iter)
non_convex_path_gd = gradient_descent(non_convex_function, grad_non_convex, non_convex_start, learning_rate, n_iter)
st.plotly_chart(plot_3d_surface(convex_function, convex_path_gd, "Convex Function (GD)"))
st.plotly_chart(plot_3d_surface(non_convex_function, non_convex_path_gd, "Non-Convex Function (GD)"))
with tab2:
st.header("Stochastic Gradient Descent")
st.write("Visualizing stochastic gradient descent on convex and non-convex functions.")
with st.expander("Stochastic Gradient Descent Algorithm and Math"):
st.markdown(r"""
### Stochastic Gradient Descent Algorithm
**Step-by-step Algorithm**:
1. Initialize starting point $\mathbf{x}_0$.
2. For each iteration $t$:
- Compute a stochastic approximation of the gradient $\nabla f(\mathbf{x}_t) + \text{noise}$.
- Update the current point: $\mathbf{x}_{t+1} = \mathbf{x}_t - \alpha \left(\nabla f(\mathbf{x}_t) + \text{noise}\right)$.
**Mathematical Formulation**:
$$
\mathbf{x}_{t+1} = \mathbf{x}_t - \alpha \left(\nabla f(\mathbf{x}_t) + \text{noise}\right)
$$
where:
- $\mathbf{x}_t$ is the current point.
- $\alpha$ is the learning rate.
- $\nabla f(\mathbf{x}_t)$ is the gradient of the function at $\mathbf{x}_t$.
- $\text{noise}$ is a small random perturbation.
""")
convex_path_sgd = stochastic_gradient_descent(convex_function, grad_convex, convex_start, learning_rate, n_iter)
non_convex_path_sgd = stochastic_gradient_descent(non_convex_function, grad_non_convex, non_convex_start, learning_rate, n_iter)
st.plotly_chart(plot_3d_surface(convex_function, convex_path_sgd, "Convex Function (SGD)"))
st.plotly_chart(plot_3d_surface(non_convex_function, non_convex_path_sgd, "Non-Convex Function (SGD)"))
with tab3:
st.header("Simulated Annealing")
st.write("Visualizing simulated annealing on a non-convex function.")
with st.expander("Simulated Annealing Algorithm and Math"):
st.markdown(r"""
### Simulated Annealing Algorithm
**Step-by-step Algorithm**:
1. Initialize starting point $\mathbf{x}_0$ and temperature $T$.
2. For each iteration $t$:
- Generate a new point $\mathbf{x}'$ in the neighborhood of the current point $\mathbf{x}_t$.
- Compute the change in function value $\Delta E = f(\mathbf{x}') - f(\mathbf{x}_t)$.
- If $\Delta E < 0$, accept the new point $\mathbf{x}_{t+1} = \mathbf{x}'$.
- If $\Delta E \geq 0$, accept the new point with a probability $\exp\left(\frac{-\Delta E}{T}\right)$.
- Update the temperature $T$.
**Mathematical Formulation**:
$$
\mathbf{x}_{t+1} =
\begin{cases}
\mathbf{x}' & \text{if } \Delta E < 0 \\
\mathbf{x}' & \text{with probability } \exp\left(\frac{-\Delta E}{T}\right) \text{ if } \Delta E \geq 0 \\
\mathbf{x}_t & \text{otherwise}
\end{cases}
$$
where:
- $\mathbf{x}_t$ is the current point.
- $\mathbf{x}'$ is the new point.
- $T$ is the temperature.
- $\Delta E = f(\mathbf{x}') - f(\mathbf{x}_t)$ is the change in function value.
- $\exp\left(\frac{-\Delta E}{T}\right)$ is the acceptance probability.
""")
non_convex_path_sa, lowest_point = simulated_annealing(non_convex_function, non_convex_start, temp, cooling_rate, n_iter)
# Visualizing the path with alpha changing based on iteration
alphas = np.linspace(0.1, 1, len(non_convex_path_sa))
fig_sa = plot_3d_surface(non_convex_function, non_convex_path_sa, "Non-Convex Function (SA)", alphas=alphas, lowest_point=lowest_point)
# Adding blue points for other iteration's observed minimums
other_mins = non_convex_path_sa[:-1]
fig_sa.add_trace(go.Scatter3d(
x=other_mins[:, 0],
y=other_mins[:, 1],
z=non_convex_function(other_mins[:, 0], other_mins[:, 1]),
mode='markers',
marker=dict(size=4, color='blue'),
name='Observed Minima'
))
# Adding the final minimum point in red
fig_sa.add_trace(go.Scatter3d(
x=[lowest_point[0]],
y=[lowest_point[1]],
z=[non_convex_function(lowest_point[0], lowest_point[1])],
mode='markers',
marker=dict(size=6, color='red'),
name='Lowest Observed'
))
# Adding the starting point in green
fig_sa.add_trace(go.Scatter3d(
x=[non_convex_path_sa[0, 0]],
y=[non_convex_path_sa[0, 1]],
z=[non_convex_function(non_convex_path_sa[0, 0], non_convex_path_sa[0, 1])],
mode='markers',
marker=dict(size=6, color='green'),
name='Start'
))
st.plotly_chart(fig_sa)