File size: 6,377 Bytes
1062a59
8081430
 
36ff860
 
 
8081430
 
 
 
 
 
 
 
 
 
 
 
 
36ff860
8081430
 
 
 
 
 
 
 
 
 
 
 
 
36ff860
8081430
36ff860
8081430
 
 
 
 
 
 
 
 
 
 
36ff860
8081430
 
 
 
 
 
36ff860
8081430
 
 
 
 
36ff860
8081430
 
 
 
 
36ff860
 
8081430
 
 
 
 
 
 
 
 
 
36ff860
 
8081430
 
 
 
 
 
 
 
 
 
 
 
 
 
36ff860
8081430
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce0a2c6
 
8081430
 
ce0a2c6
 
 
8081430
 
b95ad10
 
8081430
 
 
 
 
 
 
 
 
 
36ff860
 
 
 
 
 
 
 
 
 
 
 
 
8081430
 
 
 
 
 
 
36ff860
 
 
8081430
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4caab07
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
### CSCI 4750/5750: regression models and gradient descent


### CSCI 4750/5750: regression models


import gradio as gr
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression

def cal_mse(X,y,b,w):
  thetas = np.array([[b], [w]])
  X_b = np.c_[np.ones((len(X), 1)), X]  # add x0 = 1 to each instance
  y_predict = X_b.dot(thetas)
  mse = np.mean((y_predict-y)**2)
  return mse

def gradient_descent(intercept=4, slope=3, intercept_random=4, slope_random=3, gradient_descent=False, learning_rate= 0.01, iteration=100):
  ### (1) generate simulated data points
  X = 2 * np.random.rand(100, 1)
  y = intercept + slope * X + np.random.randn(100, 1)

  ### (2) fit regression model
  lin_reg = LinearRegression()
  lin_reg.fit(X, y)

  ### (3) make a prediction on training data
  y_predict = lin_reg.predict(X)
  y_predict

  ### (4) Draw baseline linear Line
  fig = plt.figure(figsize=(12,18))

  plt.subplot(3,1,1)
  plt.plot(X, y_predict, "r-", linewidth=2, label = "Line of best fit")
  plt.plot(X, y, "b.")


  ### (4.2) Draw random line
  if intercept_random != intercept or slope_random != slope: #avoid overlap
    X_new = np.array([[0], [2]])
    X_new_b = np.c_[np.ones((2, 1)), X_new]  # add x0 = 1 to each instance
    y_predict = X_new_b.dot(np.array([intercept_random, slope_random]))
    plt.plot(X_new, y_predict, "g-", linewidth=2, label = "Random line")
    
    
    ### (4.3) Apply gradient desc
    if gradient_descent:
      b = intercept_random
      w = slope_random

      lr = learning_rate # learning rate
      iteration = iteration

      # Store initial values for plotting.
      b_history = [b]
      w_history = [w]

      train_mse = []
      # Iterations
      for i in range(iteration):
          b_grad = 0.0
          w_grad = 0.0
          for n in range(len(X)):        
              b_grad = b_grad  - (y[n,0] - b - w*X[n,0])*1.0
              w_grad = w_grad  - (y[n,0] - b - w*X[n,0])*X[n,0]
          b_grad /= len(X)
          w_grad /= len(X)
          
          # Update parameters.
          b = b - lr * b_grad 
          w = w - lr * w_grad
          
          # Store parameters for plotting
          b_history.append(b)
          w_history.append(w)
          
          train_mse.append(cal_mse(X,y,b,w))
  
  plt.xlabel("$x_1$", fontsize=22)
  plt.ylabel("$y$", rotation=0, fontsize=22)
  plt.xticks(fontsize=18)
  plt.yticks(fontsize=18)
  plt.axis([np.min(X)*0.1, np.max(X)*1.1, np.min(y)*0.1, np.max(y)*1.1])
  plt.title("Linear Regression model predictions", fontsize=22)
  plt.legend(fontsize=18)





  ### (5) Visualize loss function
  plt.subplot(3,1,2)

  ### (5.1) generate grid of parameters
  b = np.arange(-10,10,0.1) #bias
  w = np.arange(-10,10,0.1) #weight

  ### (5.2) Calculate MSE over parameters
  Z =  np.zeros((len(w), len(b)))

  for i in range(len(w)):
      for j in range(len(b)):
        w0 = w[i]
        b0 = b[j]
        Z[i][j] = cal_mse(X, y, b0, w0)


  ### (5.3) Get optimal parameters
  theta0_best = lin_reg.intercept_[0]
  theta1_best = lin_reg.coef_[0][0]


  ### (5.4) Draw the contour graph 
  plt.contourf(b,w,Z, 50, alpha=0.5, cmap=plt.get_cmap('jet'))

  ### (5.5) Add optimal loss
  #plt.plot(theta0_best, theta1_best, 'x', ms=12, markeredgewidth=3, color='orange')
  #plt.text(theta0_best, theta1_best,'MSE:'+str(np.round(cal_mse(X,y,theta0_best, theta1_best),2)), color='red', fontsize=22)
  
  ### (5.6) Add loss of random lines
  #if intercept_random != intercept or slope_random != slope: #avoid overlap
    #plt.plot(intercept_random, slope_random, 'o', ms=5, markeredgewidth=3, color='orange')
    #plt.text(intercept_random, slope_random,'MSE:'+str(np.round(cal_mse(X,y,intercept_random, slope_random),2)), fontsize=22)
    
    ### (5.7) draw gradient updates
    #if gradient_descent:
    #  plt.plot(b_history, w_history, 'o-', ms=3, lw=1.5, color='black')



  plt.title("Visualization of Gradient Descent Process", fontsize=22)
  plt.xlabel("$Intercept$", fontsize=22)
  plt.ylabel("$Slope$", rotation=0, fontsize=22)
  plt.xticks(fontsize=18)
  plt.yticks(fontsize=18)
  plt.xlim(-10,10)
  plt.ylim(-10,10)


  ### 6. Visualize the learning curves 
  if gradient_descent:
      plt.subplot(3,1,3)
      plt.plot(train_mse,label="train_loss (lr="+str(learning_rate)+")")
      plt.xlabel('Iteration',fontweight="bold",fontsize = 22)
      plt.ylabel('Loss',fontweight="bold",fontsize = 22)
      plt.title("Learning curve: Loss VS Epochs",fontweight="bold",fontsize = 22)
      plt.legend(fontsize=18)
      plt.xticks(fontsize=18)
      plt.yticks(fontsize=18)

  #plt.show()
  fig.tight_layout()
  plt.savefig('plot_line.png', dpi=300)
  return 'plot_line.png'






#### Define input component
input_intercept = gr.inputs.Slider(1, 8, step=0.5, default=4, label='(Baseline) Intercept')
input_slope = gr.inputs.Slider(-8, 8, step=0.5, default=2.8, label='(Baseline) Slope')

input_intercept_random = gr.inputs.Slider(-8, 8, step=0.5, default=-7.5, label='(Random) Intercept')
input_slope_random = gr.inputs.Slider(-8, 8, step=0.5, default=-4.5, label='(Random) Slope')

input_gradients = gr.inputs.Checkbox(label="Apply Gradient Descent")

input_learningrate = gr.inputs.Slider(0,2, step=0.0001, default=0.001, label='Learning Rate')
input_interation = gr.inputs.Slider(1, 1000, step=2, default=100, label='Iteration')


#### Define output component
output_plot1 = gr.outputs.Image(label="Regression plot")


### configure gradio, detailed can be found at https://www.gradio.app/docs/#i_slider
interface = gr.Interface(fn=gradient_descent, 
                         inputs=[input_intercept, input_slope, input_intercept_random, input_slope_random, input_gradients, input_learningrate, input_interation], 
                         outputs=[output_plot1],
                         examples_per_page = 2,
                         examples = [[4, 3, -7, -5, True, 0.0001, 100], [1, 2, -7, -8, False, 0.0001, 100]], 
                         title="CSCI4750/5750: Regression models (Gradient Descent)", 
                         description= "Click examples to generate random dataset and select gradient descent parameters",
                         theme = 'huggingface',
                         layout = 'vertical'
                         )
                         
interface.launch(debug=True)