| |
| |
|
|
| import gradio as gr |
| import matplotlib |
| import matplotlib.pyplot as plt |
| import numpy as np |
| from sklearn.linear_model import LinearRegression |
|
|
| def cal_mse(X,y,b,w): |
| thetas = np.array([[b], [w]]) |
| X_b = np.c_[np.ones((len(X), 1)), X] |
| y_predict = X_b.dot(thetas) |
| mse = np.mean((y_predict-y)**2) |
| return mse |
|
|
| def gradient_descent(n_samples=100, intercept=4, slope=3, intercept_random=4, slope_random=3, gradient_descent='False', gradient_descent_type = 'Batch GradientDescent' , learning_rate= 0.01, iteration=100, mini_batchsize = 32): |
| if n_samples < mini_batchsize: |
| mini_batchsize = n_samples |
| |
| X = 2 * np.random.rand(n_samples, 1) |
| y = intercept + slope * X + np.random.randn(n_samples, 1) |
|
|
| |
| lin_reg = LinearRegression() |
| lin_reg.fit(X, y) |
|
|
| |
| y_predict = lin_reg.predict(X) |
| y_predict |
|
|
| |
| fig = plt.figure(figsize=(12,18)) |
|
|
| plt.subplot(3,1,1) |
| plt.plot(X, y_predict, "r-", linewidth=2, label = "Line of best fit") |
| plt.plot(X, y, "b.") |
|
|
|
|
| |
| if intercept_random != intercept or slope_random != slope: |
| X_new = np.array([[0], [2]]) |
| X_new_b = np.c_[np.ones((2, 1)), X_new] |
| y_predict = X_new_b.dot(np.array([intercept_random, slope_random])) |
| plt.plot(X_new, y_predict, "g-", linewidth=2, label = "Random line") |
| |
| |
| |
| if gradient_descent: |
| b = intercept_random |
| w = slope_random |
|
|
| lr = learning_rate |
| iteration = iteration |
|
|
| if gradient_descent_type == 'Batch GradientDescent': |
| |
| b_history = [b] |
| w_history = [w] |
|
|
| train_mse = [] |
| |
| for i in range(iteration): |
| b_grad = 0.0 |
| w_grad = 0.0 |
| for n in range(len(X)): |
| b_grad = b_grad - 2*(y[n,0] - b - w*X[n,0])*1.0 |
| w_grad = w_grad - 2*(y[n,0] - b - w*X[n,0])*X[n,0] |
| b_grad /= len(X) |
| w_grad /= len(X) |
| |
| |
| b = b - lr * b_grad |
| w = w - lr * w_grad |
| |
| |
| b_history.append(b) |
| w_history.append(w) |
| |
| train_mse.append(cal_mse(X,y,b,w)) |
| elif gradient_descent_type == 'Stochastic GradientDescent': |
| |
| b_history = [b] |
| w_history = [w] |
|
|
| train_mse = [] |
| |
| for i in range(iteration): |
| for n in range(len(X)): |
| random_index = np.random.randint(len(X)) |
| b_grad = -2*(y[random_index,0] - b - w*X[random_index,0])*1.0 |
| w_grad = -2*(y[random_index,0] - b - w*X[random_index,0])*X[random_index,0] |
| |
| |
| b = b - lr * b_grad |
| w = w - lr * w_grad |
| |
| |
| b_history.append(b) |
| w_history.append(w) |
| |
| train_mse.append(cal_mse(X,y,b,w)) |
| if gradient_descent_type == 'Mini-Batch GradientDescent': |
| |
| b_history = [b] |
| w_history = [w] |
|
|
| train_mse = [] |
| |
| minibatch_size = mini_batchsize |
| for i in range(iteration): |
| |
| shuffled_indices = np.random.permutation(len(X)) |
| X_b_shuffled = X[shuffled_indices] |
| y_shuffled = y[shuffled_indices] |
| for k in range(0, len(X), minibatch_size): |
| X_mini = X_b_shuffled[k:k+minibatch_size] |
| y_mini = y_shuffled[k:k+minibatch_size] |
| |
| b_grad = 0.0 |
| w_grad = 0.0 |
| for n in range(len(X_mini)): |
| b_grad = b_grad - 2*(y_mini[n,0] - b - w*X_mini[n,0])*1.0 |
| w_grad = w_grad - 2*(y_mini[n,0] - b - w*X_mini[n,0])*X_mini[n,0] |
| b_grad /= len(X_mini) |
| w_grad /= len(X_mini) |
| |
| |
| b = b - lr * b_grad |
| w = w - lr * w_grad |
| |
| |
| b_history.append(b) |
| w_history.append(w) |
| |
| train_mse.append(cal_mse(X,y,b,w)) |
| |
| plt.xlabel("$x_1$", fontsize=22) |
| plt.ylabel("$y$", rotation=0, fontsize=22) |
| plt.xticks(fontsize=18) |
| plt.yticks(fontsize=18) |
| plt.axis([np.min(X)*0.1, np.max(X)*1.1, np.min(y)*0.1, np.max(y)*1.1]) |
| plt.title("Linear Regression model predictions", fontsize=22) |
| plt.legend(fontsize=18) |
| plt.xlim(0,2) |
| plt.ylim(-10,10) |
|
|
|
|
|
|
|
|
|
|
| |
| plt.subplot(3,1,2) |
|
|
| |
| b = np.arange(-10,10,0.1) |
| w = np.arange(-10,10,0.1) |
|
|
| |
| Z = np.zeros((len(w), len(b))) |
|
|
| for i in range(len(w)): |
| for j in range(len(b)): |
| w0 = w[i] |
| b0 = b[j] |
| Z[i][j] = cal_mse(X, y, b0, w0) |
|
|
|
|
| |
| theta0_best = lin_reg.intercept_[0] |
| theta1_best = lin_reg.coef_[0][0] |
|
|
|
|
| |
| plt.contourf(b,w,Z, 50, alpha=0.5, cmap=plt.get_cmap('jet')) |
|
|
| |
| plt.plot(theta0_best, theta1_best, 'x', ms=12, markeredgewidth=3, color='orange') |
| plt.text(theta0_best, theta1_best,'MSE:'+str(np.round(cal_mse(X,y,theta0_best, theta1_best),2)), color='red', fontsize=22) |
| |
|
|
| |
| if intercept_random != intercept or slope_random != slope: |
| plt.plot(intercept_random, slope_random, 'o', ms=5, markeredgewidth=3, color='orange') |
| plt.text(intercept_random, slope_random,'MSE:'+str(np.round(cal_mse(X,y,intercept_random, slope_random),2)), fontsize=22) |
| |
| |
| if gradient_descent: |
| plt.plot(b_history, w_history, 'o-', ms=3, lw=1.5, color='black') |
| plt.title("Visualization of Gradient Descent Process ("+gradient_descent_type+")", fontsize=22) |
| else: |
| plt.title("Visualization of Loss Function Map", fontsize=22) |
| else: |
| plt.title("Visualization of Loss Function Map", fontsize=22) |
| plt.xlabel("$Intercept$", fontsize=22) |
| plt.ylabel("$Slope$", rotation=0, fontsize=22) |
| plt.xticks(fontsize=18) |
| plt.yticks(fontsize=18) |
| plt.xlim(-10,10) |
| plt.ylim(-10,10) |
|
|
|
|
| |
| if gradient_descent: |
| plt.subplot(3,1,3) |
| plt.plot(train_mse,label="train_loss (lr="+str(learning_rate)+")") |
| plt.xlabel('Iteration',fontweight="bold",fontsize = 22) |
| plt.ylabel('Loss',fontweight="bold",fontsize = 22) |
| plt.title("Learning curve: Loss VS Epochs",fontweight="bold",fontsize = 22) |
| plt.legend(fontsize=18) |
| plt.xticks(fontsize=18) |
| plt.yticks(fontsize=18) |
|
|
| |
| fig.tight_layout() |
| plt.savefig('plot_line.png', dpi=300) |
| return 'plot_line.png' |
|
|
|
|
| |
| input_sample = gr.Slider(1, 5000, step=50, value=100, label='N samples') |
| input_intercept = gr.Slider(1, 8, step=0.5, value=4, label='(Baseline) Intercept') |
| input_slope = gr.Slider(-8, 8, step=0.5, value=2.8, label='(Baseline) Slope') |
|
|
| input_intercept_random = gr.Slider(-8, 8, step=0.5, value=-7.5, label='(Random) Intercept') |
| input_slope_random = gr.Slider(-8, 8, step=0.5, value=7.5, label='(Random) Slope') |
|
|
| input_gradients = gr.Checkbox(label="Apply Gradient Descent") |
| |
| input_gradients_type = gr.Dropdown(['Batch GradientDescent', 'Stochastic GradientDescent', 'Mini-Batch GradientDescent'],label="Type of Gradient Descent") |
|
|
|
|
| input_batchsize = gr.Slider(1, 64, step=1, value=32, label='Batch size for Mini-BatchGD') |
|
|
| input_learningrate = gr.Slider(0,2, step=0.001, value=0.001, label='Learning Rate') |
| input_iteration = gr.Slider(1, 1000, step=2, value=100, label='Iteration') |
|
|
|
|
| |
| output_plot1 = gr.Image(label="Regression plot") |
|
|
|
|
| |
| interface = gr.Interface(fn=gradient_descent, |
| inputs=[input_sample, input_intercept, input_slope, input_intercept_random, input_slope_random, input_gradients, input_gradients_type, input_learningrate, input_iteration, input_batchsize], |
| outputs=[output_plot1], |
| examples_per_page = 2, |
| |
| title="ML Demo: Regression models (Batch/Mini-Batch/Stochastic Gradient Descent)", |
| description= "Click examples to generate random dataset and select gradient descent parameters", |
| theme = 'huggingface', |
| |
| ) |
| |
| interface.launch(debug=True) |