File size: 9,627 Bytes
3b146d8
22a4fb6
3b146d8
 
 
 
 
 
 
 
 
 
 
 
 
 
2751a05
fad105d
 
3b146d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2751a05
3b146d8
 
 
 
 
91e87a3
 
 
3b146d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270d363
 
 
3b146d8
270d363
 
3b146d8
f99e40e
3b146d8
f99e40e
3b146d8
2751a05
270d363
2751a05
270d363
 
3b146d8
 
 
f99e40e
3b146d8
 
 
 
2751a05
3b146d8
 
 
84de0f2
3b146d8
 
84de0f2
3b146d8
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
### CSCI 4750/5750: regression models
### SLU-CS: Jie Hou

import gradio as gr
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression

def cal_mse(X,y,b,w):
  thetas = np.array([[b], [w]])
  X_b = np.c_[np.ones((len(X), 1)), X]  # add x0 = 1 to each instance
  y_predict = X_b.dot(thetas)
  mse = np.mean((y_predict-y)**2)
  return mse

def gradient_descent(n_samples=100, intercept=4, slope=3, intercept_random=4, slope_random=3, gradient_descent='False', gradient_descent_type = 'Batch GradientDescent' , learning_rate= 0.01, iteration=100, mini_batchsize = 32):
  if n_samples < mini_batchsize:
      mini_batchsize = n_samples
  ### (1) generate simulated data points
  X = 2 * np.random.rand(n_samples, 1)
  y = intercept + slope * X + np.random.randn(n_samples, 1)

  ### (2) fit regression model
  lin_reg = LinearRegression()
  lin_reg.fit(X, y)

  ### (3) make a prediction on training data
  y_predict = lin_reg.predict(X)
  y_predict

  ### (4) Draw baseline linear Line
  fig = plt.figure(figsize=(12,18))

  plt.subplot(3,1,1)
  plt.plot(X, y_predict, "r-", linewidth=2, label = "Line of best fit")
  plt.plot(X, y, "b.")


  ### (4.2) Draw random line
  if intercept_random != intercept or slope_random != slope: #avoid overlap
    X_new = np.array([[0], [2]])
    X_new_b = np.c_[np.ones((2, 1)), X_new]  # add x0 = 1 to each instance
    y_predict = X_new_b.dot(np.array([intercept_random, slope_random]))
    plt.plot(X_new, y_predict, "g-", linewidth=2, label = "Random line")
    
    
    ### (4.3) Apply gradient desc
    if gradient_descent:
      b = intercept_random
      w = slope_random

      lr = learning_rate # learning rate
      iteration = iteration

      if gradient_descent_type == 'Batch GradientDescent':
          # Store initial values for plotting.
          b_history = [b]
          w_history = [w]

          train_mse = []
          # Iterations
          for i in range(iteration):
              b_grad = 0.0
              w_grad = 0.0
              for n in range(len(X)):        
                  b_grad = b_grad  - 2*(y[n,0] - b - w*X[n,0])*1.0
                  w_grad = w_grad  - 2*(y[n,0] - b - w*X[n,0])*X[n,0]
              b_grad /= len(X)
              w_grad /= len(X)
              
              # Update parameters.
              b = b - lr * b_grad 
              w = w - lr * w_grad
              
              # Store parameters for plotting
              b_history.append(b)
              w_history.append(w)
              
              train_mse.append(cal_mse(X,y,b,w))
      elif gradient_descent_type == 'Stochastic GradientDescent':
          # Store initial values for plotting.
          b_history = [b]
          w_history = [w]

          train_mse = []
          # Iterations
          for i in range(iteration):
              for n in range(len(X)):
                  random_index = np.random.randint(len(X))        
                  b_grad = -2*(y[random_index,0] - b - w*X[random_index,0])*1.0
                  w_grad = -2*(y[random_index,0] - b - w*X[random_index,0])*X[random_index,0]
                  
                  # Update parameters.
                  b = b - lr * b_grad 
                  w = w - lr * w_grad
              
                  # Store parameters for plotting
                  b_history.append(b)
                  w_history.append(w)
              
                  train_mse.append(cal_mse(X,y,b,w))
      if gradient_descent_type == 'Mini-Batch GradientDescent':
          # Store initial values for plotting.
          b_history = [b]
          w_history = [w]

          train_mse = []
          # Iterations
          minibatch_size = mini_batchsize
          for i in range(iteration):
              # shuffle dataset
              shuffled_indices = np.random.permutation(len(X))
              X_b_shuffled = X[shuffled_indices]
              y_shuffled = y[shuffled_indices]
              for k in range(0, len(X), minibatch_size):
                  X_mini = X_b_shuffled[k:k+minibatch_size]
                  y_mini = y_shuffled[k:k+minibatch_size]
                         
                  b_grad = 0.0
                  w_grad = 0.0
                  for n in range(len(X_mini)):        
                      b_grad = b_grad  - 2*(y_mini[n,0] - b - w*X_mini[n,0])*1.0
                      w_grad = w_grad  - 2*(y_mini[n,0] - b - w*X_mini[n,0])*X_mini[n,0]
                  b_grad /= len(X_mini)
                  w_grad /= len(X_mini)
                  
                  # Update parameters.
                  b = b - lr * b_grad 
                  w = w - lr * w_grad
                  
                  # Store parameters for plotting
                  b_history.append(b)
                  w_history.append(w)
                  
                  train_mse.append(cal_mse(X,y,b,w))
  
  plt.xlabel("$x_1$", fontsize=22)
  plt.ylabel("$y$", rotation=0, fontsize=22)
  plt.xticks(fontsize=18)
  plt.yticks(fontsize=18)
  plt.axis([np.min(X)*0.1, np.max(X)*1.1, np.min(y)*0.1, np.max(y)*1.1])
  plt.title("Linear Regression model predictions", fontsize=22)
  plt.legend(fontsize=18)
  plt.xlim(0,2)
  plt.ylim(-10,10)





  ### (5) Visualize loss function
  plt.subplot(3,1,2)

  ### (5.1) generate grid of parameters
  b = np.arange(-10,10,0.1) #bias
  w = np.arange(-10,10,0.1) #weight

  ### (5.2) Calculate MSE over parameters
  Z =  np.zeros((len(w), len(b)))

  for i in range(len(w)):
      for j in range(len(b)):
        w0 = w[i]
        b0 = b[j]
        Z[i][j] = cal_mse(X, y, b0, w0)


  ### (5.3) Get optimal parameters
  theta0_best = lin_reg.intercept_[0]
  theta1_best = lin_reg.coef_[0][0]


  ### (5.4) Draw the contour graph 
  plt.contourf(b,w,Z, 50, alpha=0.5, cmap=plt.get_cmap('jet'))

  ### (5.5) Add optimal loss
  plt.plot(theta0_best, theta1_best, 'x', ms=12, markeredgewidth=3, color='orange')
  plt.text(theta0_best, theta1_best,'MSE:'+str(np.round(cal_mse(X,y,theta0_best, theta1_best),2)), color='red', fontsize=22)
  

  ### (5.6) Add loss of random lines
  if intercept_random != intercept or slope_random != slope: #avoid overlap
    plt.plot(intercept_random, slope_random, 'o', ms=5, markeredgewidth=3, color='orange')
    plt.text(intercept_random, slope_random,'MSE:'+str(np.round(cal_mse(X,y,intercept_random, slope_random),2)), fontsize=22)
    
    ### (5.7) draw gradient updates
    if gradient_descent:
      plt.plot(b_history, w_history, 'o-', ms=3, lw=1.5, color='black')
      plt.title("Visualization of Gradient Descent Process ("+gradient_descent_type+")", fontsize=22)
    else:
      plt.title("Visualization of Loss Function Map", fontsize=22)
  else:
    plt.title("Visualization of Loss Function Map", fontsize=22)
  plt.xlabel("$Intercept$", fontsize=22)
  plt.ylabel("$Slope$", rotation=0, fontsize=22)
  plt.xticks(fontsize=18)
  plt.yticks(fontsize=18)
  plt.xlim(-10,10)
  plt.ylim(-10,10)


  ### 6. Visualize the learning curves 
  if gradient_descent:
      plt.subplot(3,1,3)
      plt.plot(train_mse,label="train_loss (lr="+str(learning_rate)+")")
      plt.xlabel('Iteration',fontweight="bold",fontsize = 22)
      plt.ylabel('Loss',fontweight="bold",fontsize = 22)
      plt.title("Learning curve: Loss VS Epochs",fontweight="bold",fontsize = 22)
      plt.legend(fontsize=18)
      plt.xticks(fontsize=18)
      plt.yticks(fontsize=18)

  #plt.show()
  fig.tight_layout()
  plt.savefig('plot_line.png', dpi=300)
  return 'plot_line.png'


#### Define input component
input_sample = gr.Slider(1, 5000, step=50, value=100, label='N samples')
input_intercept = gr.Slider(1, 8, step=0.5, value=4, label='(Baseline) Intercept')
input_slope = gr.Slider(-8, 8, step=0.5, value=2.8, label='(Baseline) Slope')

input_intercept_random = gr.Slider(-8, 8, step=0.5, value=-7.5, label='(Random) Intercept')
input_slope_random = gr.Slider(-8, 8, step=0.5, value=7.5, label='(Random) Slope')

input_gradients = gr.Checkbox(label="Apply Gradient Descent")
#input_gradients_type = gr.inputs.CheckboxGroup(['Batch GradientDescient', 'Stochastic GradientDescent', 'Mini-Batch GradientDescent'],label="Type of Gradient Descent")
input_gradients_type = gr.Dropdown(['Batch GradientDescent', 'Stochastic GradientDescent', 'Mini-Batch GradientDescent'],label="Type of Gradient Descent")


input_batchsize = gr.Slider(1, 64, step=1, value=32, label='Batch size for Mini-BatchGD')

input_learningrate = gr.Slider(0,2, step=0.001, value=0.001, label='Learning Rate')
input_iteration = gr.Slider(1, 1000, step=2, value=100, label='Iteration')


#### Define output component
output_plot1 = gr.Image(label="Regression plot")


### configure gradio, detailed can be found at https://www.gradio.app/docs/#i_slider
interface = gr.Interface(fn=gradient_descent, 
                         inputs=[input_sample, input_intercept, input_slope, input_intercept_random, input_slope_random, input_gradients, input_gradients_type, input_learningrate, input_iteration, input_batchsize], 
                         outputs=[output_plot1],
                         examples_per_page = 2,
                         #examples = [[4, 3, -7, -5, True, 0.0001, 100], [1, 2, -7, -8, False, 0.0001, 100]], 
                         title="ML Demo: Regression models (Batch/Mini-Batch/Stochastic Gradient Descent)", 
                         description= "Click examples to generate random dataset and select gradient descent parameters",
                         theme = 'huggingface',
                         #layout = 'vertical'
                         )
                         
interface.launch(debug=True)