### CSCI 4750/5750: regression models and gradient descent


### CSCI 4750/5750: regression models


import gradio as gr
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression

def cal_mse(X,y,b,w):
  thetas = np.array([[b], [w]])
  X_b = np.c_[np.ones((len(X), 1)), X]  # add x0 = 1 to each instance
  y_predict = X_b.dot(thetas)
  mse = np.mean((y_predict-y)**2)
  return mse

def gradient_descent(intercept=4, slope=3, intercept_random=4, slope_random=3, gradient_descent=False, learning_rate= 0.01, iteration=100):
  ### (1) generate simulated data points
  X = 2 * np.random.rand(100, 1)
  y = intercept + slope * X + np.random.randn(100, 1)

  ### (2) fit regression model
  lin_reg = LinearRegression()
  lin_reg.fit(X, y)

  ### (3) make a prediction on training data
  y_predict = lin_reg.predict(X)
  y_predict

  ### (4) Draw baseline linear Line
  fig = plt.figure(figsize=(12,20))

  plt.subplot(3,1,1)
  plt.plot(X, y_predict, "r-", linewidth=2, label = "Line of best fit")
  plt.plot(X, y, "b.")


  ### (4.2) Draw random line
  if intercept_random != intercept or slope_random != slope: #avoid overlap
    X_new = np.array([[0], [2]])
    X_new_b = np.c_[np.ones((2, 1)), X_new]  # add x0 = 1 to each instance
    y_predict = X_new_b.dot(np.array([intercept_random, slope_random]))
    plt.plot(X_new, y_predict, "g-", linewidth=2, label = "Random line")
    
    
    ### (4.3) Apply gradient desc
    if gradient_descent:
      b = intercept_random
      w = slope_random

      lr = learning_rate # learning rate
      iteration = iteration

      # Store initial values for plotting.
      b_history = [b]
      w_history = [w]

      train_mse = []
      # Iterations
      for i in range(iteration):
          b_grad = 0.0
          w_grad = 0.0
          for n in range(len(X)):        
              b_grad = b_grad  - (y[n,0] - b - w*X[n,0])*1.0
              w_grad = w_grad  - (y[n,0] - b - w*X[n,0])*X[n,0]
          b_grad /= len(X)
          w_grad /= len(X)
          
          # Update parameters.
          b = b - lr * b_grad 
          w = w - lr * w_grad
          
          # Store parameters for plotting
          b_history.append(b)
          w_history.append(w)
          
          train_mse.append(cal_mse(X,y,b,w))

          if i == int(iteration/4):
            X_tmp = np.array([[0], [2]])
            X_tmp_b = np.c_[np.ones((2, 1)), X_tmp]  # add x0 = 1 to each instance
            y_predict_tmp = X_tmp_b.dot(np.array([b, w]))
            plt.plot(X_tmp, y_predict_tmp, "brown", linewidth=2, label = "Fitted line in iteration "+str(i))
              
          if i == int(iteration/3):
            X_tmp = np.array([[0], [2]])
            X_tmp_b = np.c_[np.ones((2, 1)), X_tmp]  # add x0 = 1 to each instance
            y_predict_tmp = X_tmp_b.dot(np.array([b, w]))
            plt.plot(X_tmp, y_predict_tmp, "blue", linewidth=2, label = "Fitted line in iteration "+str(i))
              
          if i == int(iteration/2):
            X_tmp = np.array([[0], [2]])
            X_tmp_b = np.c_[np.ones((2, 1)), X_tmp]  # add x0 = 1 to each instance
            y_predict_tmp = X_tmp_b.dot(np.array([b, w]))
            plt.plot(X_tmp, y_predict_tmp, "gray", linewidth=2, label = "Fitted line in iteration "+str(i))
              
          if i == int(iteration-1):
            X_tmp = np.array([[0], [2]])
            X_tmp_b = np.c_[np.ones((2, 1)), X_tmp]  # add x0 = 1 to each instance
            y_predict_tmp = X_tmp_b.dot(np.array([b, w]))
            plt.plot(X_tmp, y_predict_tmp, "black", linewidth=2, label = "Fitted line in iteration "+str(i))
              
  plt.xlabel("$x_1$", fontsize=22)
  plt.ylabel("$y$", rotation=0, fontsize=22)
  plt.xticks(fontsize=18)
  plt.yticks(fontsize=18)
  plt.axis([np.min(X)*0.1, np.max(X)*1.1, np.min(y)*0.1, np.max(y)*1.1])
  plt.title("Linear Regression model predictions", fontsize=22)
  plt.legend(fontsize=18)


  ### (5) Visualize loss function
  plt.subplot(3,1,2)

  ### (5.1) generate grid of parameters
  b = np.arange(-10,10,0.1) #bias
  w = np.arange(-10,10,0.1) #weight

  ### (5.2) Calculate MSE over parameters
  Z =  np.zeros((len(w), len(b)))

  for i in range(len(w)):
      for j in range(len(b)):
        w0 = w[i]
        b0 = b[j]
        Z[i][j] = cal_mse(X, y, b0, w0)


  ### (5.3) Get optimal parameters
  theta0_best = lin_reg.intercept_[0]
  theta1_best = lin_reg.coef_[0][0]


  ### (5.4) Draw the contour graph 
  plt.contourf(b,w,Z, 50, alpha=0.5, cmap=plt.get_cmap('jet'))

  ### (5.5) Add optimal loss
  plt.plot(theta0_best, theta1_best, 'x', ms=12, markeredgewidth=3, color='orange')
  plt.text(theta0_best, theta1_best,'MSE:'+str(np.round(cal_mse(X,y,theta0_best, theta1_best),2)), color='red', fontsize=22)
  
  ### (5.6) Add loss of random lines
  if intercept_random != intercept or slope_random != slope: #avoid overlap
    plt.plot(intercept_random, slope_random, 'o', ms=5, markeredgewidth=3, color='orange')
    plt.text(intercept_random, slope_random,'MSE:'+str(np.round(cal_mse(X,y,intercept_random, slope_random),2)), fontsize=22)
    
    ### (5.7) draw gradient updates
    if gradient_descent:
      plt.plot(b_history, w_history, 'o-', ms=3, lw=1.5, color='black')


  plt.title("Visualization of Gradient Descent Process", fontsize=22)
  plt.xlabel("$Intercept$", fontsize=22)
  plt.ylabel("$Slope$", rotation=0, fontsize=22)
  plt.xticks(fontsize=18)
  plt.yticks(fontsize=18)
  plt.xlim(-10,10)
  plt.ylim(-10,10)


  ### 6. Visualize the learning curves 
  if gradient_descent:
      plt.subplot(3,1,3)
      plt.plot(train_mse,label="train_loss (lr="+str(learning_rate)+")")
      plt.xlabel('Iteration',fontweight="bold",fontsize = 22)
      plt.ylabel('Loss',fontweight="bold",fontsize = 22)
      plt.title("Learning curve: Loss VS Epochs",fontweight="bold",fontsize = 22)
      plt.legend(fontsize=18)
      plt.xticks(fontsize=18)
      plt.yticks(fontsize=18)

  #plt.show()
  fig.tight_layout()
  plt.savefig('plot_line.png', dpi=300)
  return 'plot_line.png'


#### Define input component
input_intercept = gr.inputs.Slider(1, 8, step=0.5, default=4, label='(Baseline) Intercept')
input_slope = gr.inputs.Slider(-8, 8, step=0.5, default=2.8, label='(Baseline) Slope')

input_intercept_random = gr.inputs.Slider(-8, 8, step=0.5, default=4, label='(Random) Intercept')
input_slope_random = gr.inputs.Slider(-8, 8, step=0.5, default=-4.5, label='(Random) Slope')

input_gradients = gr.inputs.Checkbox(label="Apply Gradient Descent")

input_learningrate = gr.inputs.Slider(0,1, step=0.001, default=0.005, label='Learning Rate')
input_interation = gr.inputs.Slider(1, 1000, step=2, default=150, label='Iteration')


#### Define output component
output_plot1 = gr.outputs.Image(label="Regression plot")


### configure gradio, detailed can be found at https://www.gradio.app/docs/#i_slider
interface = gr.Interface(fn=gradient_descent, 
                         inputs=[input_intercept, input_slope, input_intercept_random, input_slope_random, input_gradients, input_learningrate, input_interation], 
                         outputs=[output_plot1],
                         examples_per_page = 2,
                         examples = [[4, 3, -7, -5, True, 0.0001, 100], [1, 2, -7, -8, False, 0.0001, 100]], 
                         title="CSCI4750/5750: Regression models \n (Function approximation by Gradient Descent)", 
                         description= "Click examples to generate random dataset and select gradient descent parameters",
                         theme = 'huggingface',
                         layout = 'vertical'
                         )
                         
interface.launch(debug=True)