suvradeepp commited on
Commit
1812ff9
·
verified ·
1 Parent(s): 1ce902d

Create gd_sgd_app.py

Browse files
Files changed (1) hide show
  1. gd_sgd_app.py +254 -0
gd_sgd_app.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import plotly.graph_objs as go
4
+
5
+ def convex_function(x, y):
6
+ return x**2 + y**2
7
+
8
+ def non_convex_function(x, y):
9
+ return np.sin(x) * np.cos(y) * x * y
10
+
11
+ def gradient_descent(func, grad_func, start, learning_rate, n_iter):
12
+ path = [start]
13
+ for _ in range(n_iter):
14
+ grad = grad_func(path[-1])
15
+ next_point = path[-1] - learning_rate * grad
16
+ path.append(next_point)
17
+ return np.array(path)
18
+
19
+ def stochastic_gradient_descent(func, grad_func, start, learning_rate, n_iter):
20
+ path = [start]
21
+ for _ in range(n_iter):
22
+ grad = grad_func(path[-1]) + np.random.normal(0, 0.1, 2)
23
+ next_point = path[-1] - learning_rate * grad
24
+ path.append(next_point)
25
+ return np.array(path)
26
+
27
+ def grad_convex(point):
28
+ x, y = point
29
+ return np.array([2*x, 2*y])
30
+
31
+ def grad_non_convex(point):
32
+ x, y = point
33
+ return np.array([np.cos(x) * np.cos(y) * y + np.sin(x) * np.sin(y) * x, np.cos(x) * np.cos(y) * x - np.sin(x) * np.sin(y) * y])
34
+
35
+ def simulated_annealing(func, start, temp, cooling_rate, n_iter):
36
+ path = [start]
37
+ current_point = start
38
+ lowest_point = current_point
39
+ for i in range(n_iter):
40
+ next_point = current_point + np.random.normal(0, 1, 2)
41
+ delta_E = func(next_point[0], next_point[1]) - func(current_point[0], current_point[1])
42
+ if delta_E < 0 or np.exp(-delta_E / temp) > np.random.rand():
43
+ current_point = next_point
44
+ if func(current_point[0], current_point[1]) < func(lowest_point[0], lowest_point[1]):
45
+ lowest_point = current_point
46
+ path.append(current_point)
47
+ temp *= cooling_rate
48
+ return np.array(path), lowest_point
49
+
50
+ def plot_3d_surface(func, path, title, alphas=None, lowest_point=None):
51
+ x_min, x_max = min(path[:, 0].min(), -6), max(path[:, 0].max(), 6)
52
+ y_min, y_max = min(path[:, 1].min(), -6), max(path[:, 1].max(), 6)
53
+
54
+ x = np.linspace(x_min, x_max, 200)
55
+ y = np.linspace(y_min, y_max, 200)
56
+ X, Y = np.meshgrid(x, y)
57
+ Z = func(X, Y)
58
+
59
+ fig = go.Figure(data=[go.Surface(z=Z, x=X, y=Y, opacity=0.7)])
60
+ if alphas is None:
61
+ alphas = [1.0] * len(path)
62
+
63
+ for i in range(len(path) - 1):
64
+ fig.add_trace(go.Scatter3d(
65
+ x=path[i:i+2, 0],
66
+ y=path[i:i+2, 1],
67
+ z=func(path[i:i+2, 0], path[i:i+2, 1]),
68
+ mode='lines',
69
+ line=dict(color='orange', width=4),
70
+ opacity=alphas[i],
71
+ showlegend=False
72
+ ))
73
+ fig.add_trace(go.Scatter3d(
74
+ x=path[:, 0],
75
+ y=path[:, 1],
76
+ z=func(path[:, 0], path[:, 1]),
77
+ mode='markers',
78
+ marker=dict(size=4, color='orange', opacity=alphas[-1]),
79
+ name='Path'
80
+ ))
81
+ fig.add_trace(go.Scatter3d(
82
+ x=[path[0, 0]],
83
+ y=[path[0, 1]],
84
+ z=[func(path[0, 0], path[0, 1])],
85
+ mode='markers',
86
+ marker=dict(size=6, color='green', opacity=alphas[0]),
87
+ name='Start'
88
+ ))
89
+
90
+ if lowest_point is not None:
91
+ fig.add_trace(go.Scatter3d(
92
+ x=[lowest_point[0]],
93
+ y=[lowest_point[1]],
94
+ z=[func(lowest_point[0], lowest_point[1])],
95
+ mode='markers',
96
+ marker=dict(size=6, color='red', opacity=alphas[-1]),
97
+ name='Lowest Observed'
98
+ ))
99
+
100
+ fig.update_layout(title=title, scene=dict(
101
+ xaxis_title='X',
102
+ yaxis_title='Y',
103
+ zaxis_title='Z'))
104
+ return fig
105
+
106
+ st.title("Convex and Non-Convex SGD Optimization")
107
+
108
+
109
+ tab1, tab2, tab3 = st.tabs(["Gradient Descent", "Stochastic Gradient Descent", "Simulated Annealing"])
110
+
111
+ st.sidebar.header("Parameters")
112
+
113
+ learning_rate = st.sidebar.slider("Learning Rate", 0.01, 1.0, 0.1)
114
+ n_iter = st.sidebar.slider("Number of Iterations", 10, 100, 50)
115
+ convex_start_x = st.sidebar.slider("Convex Start X", -3.0, 3.0, 2.5)
116
+ convex_start_y = st.sidebar.slider("Convex Start Y", -3.0, 3.0, 2.5)
117
+ non_convex_start_x = st.sidebar.slider("Non-Convex Start X", -3.0, 3.0, 2.5)
118
+ non_convex_start_y = st.sidebar.slider("Non-Convex Start Y", -3.0, 3.0, 2.5)
119
+ temp = st.sidebar.slider("Initial Temperature (Simulated Annealing)", 1.0, 10.0, 5.0)
120
+ cooling_rate = st.sidebar.slider("Cooling Rate (Simulated Annealing)", 0.8, 0.99, 0.95)
121
+
122
+ convex_start = np.array([convex_start_x, convex_start_y])
123
+ non_convex_start = np.array([non_convex_start_x, non_convex_start_y])
124
+
125
+ with tab1:
126
+ st.header("Gradient Descent")
127
+ st.write("Visualizing gradient descent on convex and non-convex functions.")
128
+
129
+ with st.expander("Gradient Descent Algorithm and Math"):
130
+ st.markdown(r"""
131
+ ### Gradient Descent Algorithm
132
+ **Step-by-step Algorithm**:
133
+ 1. Initialize starting point $\mathbf{x}_0$.
134
+ 2. For each iteration $t$:
135
+ - Compute the gradient $\nabla f(\mathbf{x}_t)$.
136
+ - Update the current point: $\mathbf{x}_{t+1} = \mathbf{x}_t - \alpha \nabla f(\mathbf{x}_t)$.
137
+
138
+ **Mathematical Formulation**:
139
+ $$
140
+ \mathbf{x}_{t+1} = \mathbf{x}_t - \alpha \nabla f(\mathbf{x}_t)
141
+ $$
142
+ where:
143
+ - $\mathbf{x}_t$ is the current point.
144
+ - $\alpha$ is the learning rate.
145
+ - $\nabla f(\mathbf{x}_t)$ is the gradient of the function at $\mathbf{x}_t$.
146
+ """)
147
+
148
+ convex_path_gd = gradient_descent(convex_function, grad_convex, convex_start, learning_rate, n_iter)
149
+ non_convex_path_gd = gradient_descent(non_convex_function, grad_non_convex, non_convex_start, learning_rate, n_iter)
150
+
151
+ st.plotly_chart(plot_3d_surface(convex_function, convex_path_gd, "Convex Function (GD)"))
152
+ st.plotly_chart(plot_3d_surface(non_convex_function, non_convex_path_gd, "Non-Convex Function (GD)"))
153
+
154
+ with tab2:
155
+ st.header("Stochastic Gradient Descent")
156
+ st.write("Visualizing stochastic gradient descent on convex and non-convex functions.")
157
+
158
+ with st.expander("Stochastic Gradient Descent Algorithm and Math"):
159
+ st.markdown(r"""
160
+ ### Stochastic Gradient Descent Algorithm
161
+ **Step-by-step Algorithm**:
162
+ 1. Initialize starting point $\mathbf{x}_0$.
163
+ 2. For each iteration $t$:
164
+ - Compute a stochastic approximation of the gradient $\nabla f(\mathbf{x}_t) + \text{noise}$.
165
+ - Update the current point: $\mathbf{x}_{t+1} = \mathbf{x}_t - \alpha \left(\nabla f(\mathbf{x}_t) + \text{noise}\right)$.
166
+
167
+ **Mathematical Formulation**:
168
+ $$
169
+ \mathbf{x}_{t+1} = \mathbf{x}_t - \alpha \left(\nabla f(\mathbf{x}_t) + \text{noise}\right)
170
+ $$
171
+ where:
172
+ - $\mathbf{x}_t$ is the current point.
173
+ - $\alpha$ is the learning rate.
174
+ - $\nabla f(\mathbf{x}_t)$ is the gradient of the function at $\mathbf{x}_t$.
175
+ - $\text{noise}$ is a small random perturbation.
176
+ """)
177
+
178
+ convex_path_sgd = stochastic_gradient_descent(convex_function, grad_convex, convex_start, learning_rate, n_iter)
179
+ non_convex_path_sgd = stochastic_gradient_descent(non_convex_function, grad_non_convex, non_convex_start, learning_rate, n_iter)
180
+
181
+ st.plotly_chart(plot_3d_surface(convex_function, convex_path_sgd, "Convex Function (SGD)"))
182
+ st.plotly_chart(plot_3d_surface(non_convex_function, non_convex_path_sgd, "Non-Convex Function (SGD)"))
183
+
184
+ with tab3:
185
+ st.header("Simulated Annealing")
186
+ st.write("Visualizing simulated annealing on a non-convex function.")
187
+
188
+ with st.expander("Simulated Annealing Algorithm and Math"):
189
+ st.markdown(r"""
190
+ ### Simulated Annealing Algorithm
191
+ **Step-by-step Algorithm**:
192
+ 1. Initialize starting point $\mathbf{x}_0$ and temperature $T$.
193
+ 2. For each iteration $t$:
194
+ - Generate a new point $\mathbf{x}'$ in the neighborhood of the current point $\mathbf{x}_t$.
195
+ - Compute the change in function value $\Delta E = f(\mathbf{x}') - f(\mathbf{x}_t)$.
196
+ - If $\Delta E < 0$, accept the new point $\mathbf{x}_{t+1} = \mathbf{x}'$.
197
+ - If $\Delta E \geq 0$, accept the new point with a probability $\exp\left(\frac{-\Delta E}{T}\right)$.
198
+ - Update the temperature $T$.
199
+
200
+ **Mathematical Formulation**:
201
+ $$
202
+ \mathbf{x}_{t+1} =
203
+ \begin{cases}
204
+ \mathbf{x}' & \text{if } \Delta E < 0 \\
205
+ \mathbf{x}' & \text{with probability } \exp\left(\frac{-\Delta E}{T}\right) \text{ if } \Delta E \geq 0 \\
206
+ \mathbf{x}_t & \text{otherwise}
207
+ \end{cases}
208
+ $$
209
+ where:
210
+ - $\mathbf{x}_t$ is the current point.
211
+ - $\mathbf{x}'$ is the new point.
212
+ - $T$ is the temperature.
213
+ - $\Delta E = f(\mathbf{x}') - f(\mathbf{x}_t)$ is the change in function value.
214
+ - $\exp\left(\frac{-\Delta E}{T}\right)$ is the acceptance probability.
215
+ """)
216
+
217
+ non_convex_path_sa, lowest_point = simulated_annealing(non_convex_function, non_convex_start, temp, cooling_rate, n_iter)
218
+
219
+ # Visualizing the path with alpha changing based on iteration
220
+ alphas = np.linspace(0.1, 1, len(non_convex_path_sa))
221
+ fig_sa = plot_3d_surface(non_convex_function, non_convex_path_sa, "Non-Convex Function (SA)", alphas=alphas, lowest_point=lowest_point)
222
+
223
+ # Adding blue points for other iteration's observed minimums
224
+ other_mins = non_convex_path_sa[:-1]
225
+ fig_sa.add_trace(go.Scatter3d(
226
+ x=other_mins[:, 0],
227
+ y=other_mins[:, 1],
228
+ z=non_convex_function(other_mins[:, 0], other_mins[:, 1]),
229
+ mode='markers',
230
+ marker=dict(size=4, color='blue'),
231
+ name='Observed Minima'
232
+ ))
233
+
234
+ # Adding the final minimum point in red
235
+ fig_sa.add_trace(go.Scatter3d(
236
+ x=[lowest_point[0]],
237
+ y=[lowest_point[1]],
238
+ z=[non_convex_function(lowest_point[0], lowest_point[1])],
239
+ mode='markers',
240
+ marker=dict(size=6, color='red'),
241
+ name='Lowest Observed'
242
+ ))
243
+
244
+ # Adding the starting point in green
245
+ fig_sa.add_trace(go.Scatter3d(
246
+ x=[non_convex_path_sa[0, 0]],
247
+ y=[non_convex_path_sa[0, 1]],
248
+ z=[non_convex_function(non_convex_path_sa[0, 0], non_convex_path_sa[0, 1])],
249
+ mode='markers',
250
+ marker=dict(size=6, color='green'),
251
+ name='Start'
252
+ ))
253
+
254
+ st.plotly_chart(fig_sa)