Spaces:
Sleeping
Sleeping
Create gd_sgd_app.py
Browse files- gd_sgd_app.py +254 -0
gd_sgd_app.py
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import numpy as np
|
| 3 |
+
import plotly.graph_objs as go
|
| 4 |
+
|
| 5 |
+
def convex_function(x, y):
|
| 6 |
+
return x**2 + y**2
|
| 7 |
+
|
| 8 |
+
def non_convex_function(x, y):
|
| 9 |
+
return np.sin(x) * np.cos(y) * x * y
|
| 10 |
+
|
| 11 |
+
def gradient_descent(func, grad_func, start, learning_rate, n_iter):
|
| 12 |
+
path = [start]
|
| 13 |
+
for _ in range(n_iter):
|
| 14 |
+
grad = grad_func(path[-1])
|
| 15 |
+
next_point = path[-1] - learning_rate * grad
|
| 16 |
+
path.append(next_point)
|
| 17 |
+
return np.array(path)
|
| 18 |
+
|
| 19 |
+
def stochastic_gradient_descent(func, grad_func, start, learning_rate, n_iter):
|
| 20 |
+
path = [start]
|
| 21 |
+
for _ in range(n_iter):
|
| 22 |
+
grad = grad_func(path[-1]) + np.random.normal(0, 0.1, 2)
|
| 23 |
+
next_point = path[-1] - learning_rate * grad
|
| 24 |
+
path.append(next_point)
|
| 25 |
+
return np.array(path)
|
| 26 |
+
|
| 27 |
+
def grad_convex(point):
|
| 28 |
+
x, y = point
|
| 29 |
+
return np.array([2*x, 2*y])
|
| 30 |
+
|
| 31 |
+
def grad_non_convex(point):
|
| 32 |
+
x, y = point
|
| 33 |
+
return np.array([np.cos(x) * np.cos(y) * y + np.sin(x) * np.sin(y) * x, np.cos(x) * np.cos(y) * x - np.sin(x) * np.sin(y) * y])
|
| 34 |
+
|
| 35 |
+
def simulated_annealing(func, start, temp, cooling_rate, n_iter):
|
| 36 |
+
path = [start]
|
| 37 |
+
current_point = start
|
| 38 |
+
lowest_point = current_point
|
| 39 |
+
for i in range(n_iter):
|
| 40 |
+
next_point = current_point + np.random.normal(0, 1, 2)
|
| 41 |
+
delta_E = func(next_point[0], next_point[1]) - func(current_point[0], current_point[1])
|
| 42 |
+
if delta_E < 0 or np.exp(-delta_E / temp) > np.random.rand():
|
| 43 |
+
current_point = next_point
|
| 44 |
+
if func(current_point[0], current_point[1]) < func(lowest_point[0], lowest_point[1]):
|
| 45 |
+
lowest_point = current_point
|
| 46 |
+
path.append(current_point)
|
| 47 |
+
temp *= cooling_rate
|
| 48 |
+
return np.array(path), lowest_point
|
| 49 |
+
|
| 50 |
+
def plot_3d_surface(func, path, title, alphas=None, lowest_point=None):
|
| 51 |
+
x_min, x_max = min(path[:, 0].min(), -6), max(path[:, 0].max(), 6)
|
| 52 |
+
y_min, y_max = min(path[:, 1].min(), -6), max(path[:, 1].max(), 6)
|
| 53 |
+
|
| 54 |
+
x = np.linspace(x_min, x_max, 200)
|
| 55 |
+
y = np.linspace(y_min, y_max, 200)
|
| 56 |
+
X, Y = np.meshgrid(x, y)
|
| 57 |
+
Z = func(X, Y)
|
| 58 |
+
|
| 59 |
+
fig = go.Figure(data=[go.Surface(z=Z, x=X, y=Y, opacity=0.7)])
|
| 60 |
+
if alphas is None:
|
| 61 |
+
alphas = [1.0] * len(path)
|
| 62 |
+
|
| 63 |
+
for i in range(len(path) - 1):
|
| 64 |
+
fig.add_trace(go.Scatter3d(
|
| 65 |
+
x=path[i:i+2, 0],
|
| 66 |
+
y=path[i:i+2, 1],
|
| 67 |
+
z=func(path[i:i+2, 0], path[i:i+2, 1]),
|
| 68 |
+
mode='lines',
|
| 69 |
+
line=dict(color='orange', width=4),
|
| 70 |
+
opacity=alphas[i],
|
| 71 |
+
showlegend=False
|
| 72 |
+
))
|
| 73 |
+
fig.add_trace(go.Scatter3d(
|
| 74 |
+
x=path[:, 0],
|
| 75 |
+
y=path[:, 1],
|
| 76 |
+
z=func(path[:, 0], path[:, 1]),
|
| 77 |
+
mode='markers',
|
| 78 |
+
marker=dict(size=4, color='orange', opacity=alphas[-1]),
|
| 79 |
+
name='Path'
|
| 80 |
+
))
|
| 81 |
+
fig.add_trace(go.Scatter3d(
|
| 82 |
+
x=[path[0, 0]],
|
| 83 |
+
y=[path[0, 1]],
|
| 84 |
+
z=[func(path[0, 0], path[0, 1])],
|
| 85 |
+
mode='markers',
|
| 86 |
+
marker=dict(size=6, color='green', opacity=alphas[0]),
|
| 87 |
+
name='Start'
|
| 88 |
+
))
|
| 89 |
+
|
| 90 |
+
if lowest_point is not None:
|
| 91 |
+
fig.add_trace(go.Scatter3d(
|
| 92 |
+
x=[lowest_point[0]],
|
| 93 |
+
y=[lowest_point[1]],
|
| 94 |
+
z=[func(lowest_point[0], lowest_point[1])],
|
| 95 |
+
mode='markers',
|
| 96 |
+
marker=dict(size=6, color='red', opacity=alphas[-1]),
|
| 97 |
+
name='Lowest Observed'
|
| 98 |
+
))
|
| 99 |
+
|
| 100 |
+
fig.update_layout(title=title, scene=dict(
|
| 101 |
+
xaxis_title='X',
|
| 102 |
+
yaxis_title='Y',
|
| 103 |
+
zaxis_title='Z'))
|
| 104 |
+
return fig
|
| 105 |
+
|
| 106 |
+
st.title("Convex and Non-Convex SGD Optimization")
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
tab1, tab2, tab3 = st.tabs(["Gradient Descent", "Stochastic Gradient Descent", "Simulated Annealing"])
|
| 110 |
+
|
| 111 |
+
st.sidebar.header("Parameters")
|
| 112 |
+
|
| 113 |
+
learning_rate = st.sidebar.slider("Learning Rate", 0.01, 1.0, 0.1)
|
| 114 |
+
n_iter = st.sidebar.slider("Number of Iterations", 10, 100, 50)
|
| 115 |
+
convex_start_x = st.sidebar.slider("Convex Start X", -3.0, 3.0, 2.5)
|
| 116 |
+
convex_start_y = st.sidebar.slider("Convex Start Y", -3.0, 3.0, 2.5)
|
| 117 |
+
non_convex_start_x = st.sidebar.slider("Non-Convex Start X", -3.0, 3.0, 2.5)
|
| 118 |
+
non_convex_start_y = st.sidebar.slider("Non-Convex Start Y", -3.0, 3.0, 2.5)
|
| 119 |
+
temp = st.sidebar.slider("Initial Temperature (Simulated Annealing)", 1.0, 10.0, 5.0)
|
| 120 |
+
cooling_rate = st.sidebar.slider("Cooling Rate (Simulated Annealing)", 0.8, 0.99, 0.95)
|
| 121 |
+
|
| 122 |
+
convex_start = np.array([convex_start_x, convex_start_y])
|
| 123 |
+
non_convex_start = np.array([non_convex_start_x, non_convex_start_y])
|
| 124 |
+
|
| 125 |
+
with tab1:
|
| 126 |
+
st.header("Gradient Descent")
|
| 127 |
+
st.write("Visualizing gradient descent on convex and non-convex functions.")
|
| 128 |
+
|
| 129 |
+
with st.expander("Gradient Descent Algorithm and Math"):
|
| 130 |
+
st.markdown(r"""
|
| 131 |
+
### Gradient Descent Algorithm
|
| 132 |
+
**Step-by-step Algorithm**:
|
| 133 |
+
1. Initialize starting point $\mathbf{x}_0$.
|
| 134 |
+
2. For each iteration $t$:
|
| 135 |
+
- Compute the gradient $\nabla f(\mathbf{x}_t)$.
|
| 136 |
+
- Update the current point: $\mathbf{x}_{t+1} = \mathbf{x}_t - \alpha \nabla f(\mathbf{x}_t)$.
|
| 137 |
+
|
| 138 |
+
**Mathematical Formulation**:
|
| 139 |
+
$$
|
| 140 |
+
\mathbf{x}_{t+1} = \mathbf{x}_t - \alpha \nabla f(\mathbf{x}_t)
|
| 141 |
+
$$
|
| 142 |
+
where:
|
| 143 |
+
- $\mathbf{x}_t$ is the current point.
|
| 144 |
+
- $\alpha$ is the learning rate.
|
| 145 |
+
- $\nabla f(\mathbf{x}_t)$ is the gradient of the function at $\mathbf{x}_t$.
|
| 146 |
+
""")
|
| 147 |
+
|
| 148 |
+
convex_path_gd = gradient_descent(convex_function, grad_convex, convex_start, learning_rate, n_iter)
|
| 149 |
+
non_convex_path_gd = gradient_descent(non_convex_function, grad_non_convex, non_convex_start, learning_rate, n_iter)
|
| 150 |
+
|
| 151 |
+
st.plotly_chart(plot_3d_surface(convex_function, convex_path_gd, "Convex Function (GD)"))
|
| 152 |
+
st.plotly_chart(plot_3d_surface(non_convex_function, non_convex_path_gd, "Non-Convex Function (GD)"))
|
| 153 |
+
|
| 154 |
+
with tab2:
|
| 155 |
+
st.header("Stochastic Gradient Descent")
|
| 156 |
+
st.write("Visualizing stochastic gradient descent on convex and non-convex functions.")
|
| 157 |
+
|
| 158 |
+
with st.expander("Stochastic Gradient Descent Algorithm and Math"):
|
| 159 |
+
st.markdown(r"""
|
| 160 |
+
### Stochastic Gradient Descent Algorithm
|
| 161 |
+
**Step-by-step Algorithm**:
|
| 162 |
+
1. Initialize starting point $\mathbf{x}_0$.
|
| 163 |
+
2. For each iteration $t$:
|
| 164 |
+
- Compute a stochastic approximation of the gradient $\nabla f(\mathbf{x}_t) + \text{noise}$.
|
| 165 |
+
- Update the current point: $\mathbf{x}_{t+1} = \mathbf{x}_t - \alpha \left(\nabla f(\mathbf{x}_t) + \text{noise}\right)$.
|
| 166 |
+
|
| 167 |
+
**Mathematical Formulation**:
|
| 168 |
+
$$
|
| 169 |
+
\mathbf{x}_{t+1} = \mathbf{x}_t - \alpha \left(\nabla f(\mathbf{x}_t) + \text{noise}\right)
|
| 170 |
+
$$
|
| 171 |
+
where:
|
| 172 |
+
- $\mathbf{x}_t$ is the current point.
|
| 173 |
+
- $\alpha$ is the learning rate.
|
| 174 |
+
- $\nabla f(\mathbf{x}_t)$ is the gradient of the function at $\mathbf{x}_t$.
|
| 175 |
+
- $\text{noise}$ is a small random perturbation.
|
| 176 |
+
""")
|
| 177 |
+
|
| 178 |
+
convex_path_sgd = stochastic_gradient_descent(convex_function, grad_convex, convex_start, learning_rate, n_iter)
|
| 179 |
+
non_convex_path_sgd = stochastic_gradient_descent(non_convex_function, grad_non_convex, non_convex_start, learning_rate, n_iter)
|
| 180 |
+
|
| 181 |
+
st.plotly_chart(plot_3d_surface(convex_function, convex_path_sgd, "Convex Function (SGD)"))
|
| 182 |
+
st.plotly_chart(plot_3d_surface(non_convex_function, non_convex_path_sgd, "Non-Convex Function (SGD)"))
|
| 183 |
+
|
| 184 |
+
with tab3:
|
| 185 |
+
st.header("Simulated Annealing")
|
| 186 |
+
st.write("Visualizing simulated annealing on a non-convex function.")
|
| 187 |
+
|
| 188 |
+
with st.expander("Simulated Annealing Algorithm and Math"):
|
| 189 |
+
st.markdown(r"""
|
| 190 |
+
### Simulated Annealing Algorithm
|
| 191 |
+
**Step-by-step Algorithm**:
|
| 192 |
+
1. Initialize starting point $\mathbf{x}_0$ and temperature $T$.
|
| 193 |
+
2. For each iteration $t$:
|
| 194 |
+
- Generate a new point $\mathbf{x}'$ in the neighborhood of the current point $\mathbf{x}_t$.
|
| 195 |
+
- Compute the change in function value $\Delta E = f(\mathbf{x}') - f(\mathbf{x}_t)$.
|
| 196 |
+
- If $\Delta E < 0$, accept the new point $\mathbf{x}_{t+1} = \mathbf{x}'$.
|
| 197 |
+
- If $\Delta E \geq 0$, accept the new point with a probability $\exp\left(\frac{-\Delta E}{T}\right)$.
|
| 198 |
+
- Update the temperature $T$.
|
| 199 |
+
|
| 200 |
+
**Mathematical Formulation**:
|
| 201 |
+
$$
|
| 202 |
+
\mathbf{x}_{t+1} =
|
| 203 |
+
\begin{cases}
|
| 204 |
+
\mathbf{x}' & \text{if } \Delta E < 0 \\
|
| 205 |
+
\mathbf{x}' & \text{with probability } \exp\left(\frac{-\Delta E}{T}\right) \text{ if } \Delta E \geq 0 \\
|
| 206 |
+
\mathbf{x}_t & \text{otherwise}
|
| 207 |
+
\end{cases}
|
| 208 |
+
$$
|
| 209 |
+
where:
|
| 210 |
+
- $\mathbf{x}_t$ is the current point.
|
| 211 |
+
- $\mathbf{x}'$ is the new point.
|
| 212 |
+
- $T$ is the temperature.
|
| 213 |
+
- $\Delta E = f(\mathbf{x}') - f(\mathbf{x}_t)$ is the change in function value.
|
| 214 |
+
- $\exp\left(\frac{-\Delta E}{T}\right)$ is the acceptance probability.
|
| 215 |
+
""")
|
| 216 |
+
|
| 217 |
+
non_convex_path_sa, lowest_point = simulated_annealing(non_convex_function, non_convex_start, temp, cooling_rate, n_iter)
|
| 218 |
+
|
| 219 |
+
# Visualizing the path with alpha changing based on iteration
|
| 220 |
+
alphas = np.linspace(0.1, 1, len(non_convex_path_sa))
|
| 221 |
+
fig_sa = plot_3d_surface(non_convex_function, non_convex_path_sa, "Non-Convex Function (SA)", alphas=alphas, lowest_point=lowest_point)
|
| 222 |
+
|
| 223 |
+
# Adding blue points for other iteration's observed minimums
|
| 224 |
+
other_mins = non_convex_path_sa[:-1]
|
| 225 |
+
fig_sa.add_trace(go.Scatter3d(
|
| 226 |
+
x=other_mins[:, 0],
|
| 227 |
+
y=other_mins[:, 1],
|
| 228 |
+
z=non_convex_function(other_mins[:, 0], other_mins[:, 1]),
|
| 229 |
+
mode='markers',
|
| 230 |
+
marker=dict(size=4, color='blue'),
|
| 231 |
+
name='Observed Minima'
|
| 232 |
+
))
|
| 233 |
+
|
| 234 |
+
# Adding the final minimum point in red
|
| 235 |
+
fig_sa.add_trace(go.Scatter3d(
|
| 236 |
+
x=[lowest_point[0]],
|
| 237 |
+
y=[lowest_point[1]],
|
| 238 |
+
z=[non_convex_function(lowest_point[0], lowest_point[1])],
|
| 239 |
+
mode='markers',
|
| 240 |
+
marker=dict(size=6, color='red'),
|
| 241 |
+
name='Lowest Observed'
|
| 242 |
+
))
|
| 243 |
+
|
| 244 |
+
# Adding the starting point in green
|
| 245 |
+
fig_sa.add_trace(go.Scatter3d(
|
| 246 |
+
x=[non_convex_path_sa[0, 0]],
|
| 247 |
+
y=[non_convex_path_sa[0, 1]],
|
| 248 |
+
z=[non_convex_function(non_convex_path_sa[0, 0], non_convex_path_sa[0, 1])],
|
| 249 |
+
mode='markers',
|
| 250 |
+
marker=dict(size=6, color='green'),
|
| 251 |
+
name='Start'
|
| 252 |
+
))
|
| 253 |
+
|
| 254 |
+
st.plotly_chart(fig_sa)
|