Cippppy commited on
Commit
5f7815d
·
1 Parent(s): c9b0140

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +271 -0
app.py ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## CHOOSE BETWEEN ALTAIR & MATPLOTLIB
2
+
3
+ import gradio as gr
4
+ import altair as alt
5
+ import numpy as np
6
+ import pandas as pd
7
+ import matplotlib.pyplot as plt
8
+ import time
9
+
10
+ def make_plot(plot_type, a, epoch, progress=gr.Progress()):
11
+ if plot_type == "log":
12
+ return logReg(a=a, epoch=epoch, progress=progress)
13
+ elif plot_type == "lin":
14
+ return linReg(a=a,epoch=epoch, progress=progress)
15
+
16
+
17
+ # a = learning rate
18
+ # epoch = number of training iterations
19
+ def logReg(a, epoch, progress):
20
+ #### generate random data-set ####
21
+ progress(0.2, desc="Generating Data")
22
+ time.sleep(1)
23
+ #np.random.seed(0) # set random seed (optional)
24
+
25
+ ## set mean and covariance of our datasets
26
+ mean1 = [20,35]
27
+ cov1 = [[100,100],[-100,100]]
28
+ mean2 = [60,70]
29
+ cov2 = [[100,100],[100,-100]]
30
+
31
+ ## concatenate values to set x values for datasets
32
+ x1, x2 = np.random.multivariate_normal(mean1, cov1, 100).T
33
+ x_1, x_2 = np.random.multivariate_normal(mean2, cov2, 100).T
34
+ x1 = (np.concatenate((x1, x_1), axis=0))/10
35
+ x2 = (np.concatenate((x2, x_2), axis=0))/10
36
+
37
+ ## set y values of datasets
38
+ y1 = np.zeros(100) # y[0:100] is zero dataset (dataset we want our decision boundary to be above)
39
+ y2 = np.ones(100) # y[101:200] is one dataset (dataset we want our decision boundary to be below)
40
+ y = np.concatenate((y1, y2), axis=0) # combine datasets into one term
41
+
42
+ w = np.matrix([(np.random.rand())/100,(np.random.rand())+0.0001/100]) # begin weights at random starting point
43
+ b = np.matrix([np.random.rand()]) # begin bias term at random starting point
44
+ wb = np.concatenate((b, w), axis=1) # combine w and b into one weight term
45
+ print('f = b + x1*w1 + x2*w2')
46
+ print('Starting weights:', 'f = ', wb[0,0],'+ x1', wb[0,1], '+ x2' , wb[0,2])
47
+
48
+ loss = np.empty([epoch]) # term to store all loss terms for plotting
49
+ iterat = np.empty([epoch]) # term to store all epoch numbers to be plotted vs loss
50
+ for n in range (epoch):
51
+ iterat[n] = n
52
+
53
+ progress(0.5, desc="Finding Loss & Regression")
54
+ time.sleep(1.5)
55
+
56
+ for p in range (epoch):
57
+ L, J = np.matrix([[0.0, 0.0, 0.0]]), 0.0 # reset gradient (∂J(w)/∂w) and loss for each epoch
58
+ #### Code the equations to solve for the loss and to update
59
+ #### the weights and biases for each epoch below.
60
+
61
+ #### Hint: you will need to use the for loop below to create a summation to solve
62
+ #### for wb and J (loss) for each epoch. xj has been given as a starting point.
63
+ for i in range(len(x1)):
64
+ xj = np.matrix([1,x1[i],x2[i]])
65
+
66
+ # y_hat = (y_hat or h_w(x) expression)
67
+ y_hat = 1 / (1 + np.exp(-(wb * xj.T)))
68
+ # J = (cost function, also referred to as L)
69
+ J = -((y[i] * np.log(y_hat)) + ((1 - y[i])*np.log(1 - y_hat)))
70
+ # d_J = (∂J(w)/∂w function, equation can be solved with information on slide 27)
71
+ d_J = ((y_hat) - y[i]) * xj
72
+ # wb = (weight updating equation)
73
+ wb = wb - a * (d_J)
74
+
75
+ loss[p] = J
76
+ if ((p % 100) == 0):
77
+ print('loss:', J,' Gradient (∂J(w)/∂w) [[b, w1, w2]]:',L[0])
78
+ print('Updated weights:', 'f = ', wb[0,0],'+ x1', wb[0,1], '+ x2' , wb[0,2])
79
+ equation = "f = {w1} + {w2}x1 + {w3}x2".format(w1 = wb[0,0], w2 = wb[0,1], w3 = wb[0,2])
80
+
81
+ ## Plot decision boundary and data
82
+
83
+ progress(0.8, desc="Plotting Data")
84
+ time.sleep(1.5)
85
+
86
+ scatterData1 = pd.DataFrame({'x': x1[1:100],
87
+ 'y': x2[1:100]})
88
+ scatterFig1 = alt.Chart(scatterData1).mark_point().encode(
89
+ x='x:Q',
90
+ y='y:Q'
91
+ ).properties(
92
+ title="Decision Boundary"
93
+ )
94
+ scatterData2 = pd.DataFrame({'x': x1[101:200],
95
+ 'y': x2[101:200]})
96
+ scatterFig2 = alt.Chart(scatterData2).mark_point(color='green').encode(
97
+ x='x:Q',
98
+ y='y:Q',
99
+ ).properties(
100
+ title="Decision Boundary"
101
+ )
102
+
103
+ y2 = np.array(np.array(-(x1*wb[0,1] + wb[0,0])/wb[0,2],dtype=float))
104
+
105
+ trendLine = pd.DataFrame({'x': x1.flatten(),
106
+ 'y': y2.flatten() })
107
+ trendLineFig = alt.Chart(trendLine).mark_line().encode(
108
+ x='x:Q',
109
+ y='y:Q'
110
+ ).properties(
111
+ title="Decision Boundary"
112
+ )
113
+
114
+ finalFig = scatterFig1 + scatterFig2 + trendLineFig
115
+
116
+ lossData = pd.DataFrame({'Number of Iterations': iterat[100:],
117
+ 'Loss Value': loss[100:] })
118
+ lossFig = alt.Chart(lossData).mark_line().encode(
119
+ x='Number of Iterations:Q',
120
+ y='Loss Value:Q'
121
+ ).properties(
122
+ title='Plot of loss values over number of iterations'
123
+ )
124
+
125
+ plt.figure()
126
+ plt.plot(x1[1:100],x2[1:100],'x', x1[101:200], x2[101:200],'x') # plot random data points
127
+ plt.plot(x1, -(x1*wb[0,1] + wb[0,0])/wb[0,2] , linestyle = 'solid') # plot decision boundary
128
+ plt.axis('equal')
129
+ plt.xlabel('x1')
130
+ plt.ylabel('x2')
131
+ plt.title('Decision Boundary')
132
+ plt.savefig("plt1.png")
133
+
134
+ ## Plot training loss v epoch
135
+ plt.figure()
136
+ plt.plot(iterat[100:],loss[100:],'x')
137
+ plt.xlabel('Epoch')
138
+ plt.ylabel('Loss')
139
+ plt.title('Training Loss v Epoch')
140
+ plt.savefig("plt2.png")
141
+
142
+ return [finalFig.interactive(),lossFig.interactive(),"plt1.png","plt2.png",str(loss[len(loss)-1]),str(equation)]
143
+
144
+ # a = learning rate step size
145
+ # epoch = number of training iterations
146
+ def linReg(a, epoch, progress):
147
+ # generate random data-set
148
+ progress(0.2, desc="Generating Data")
149
+ time.sleep(1)
150
+ # np.random.seed(0) # choose random seed (optional)
151
+ x = np.random.rand(100, 1)
152
+ y = 2 + 3 * x + np.random.rand(100, 1)
153
+
154
+ # J = 0 # initialize J, this can be deleted once J is defined in the loop
155
+ w = np.matrix([np.random.rand(),np.random.rand()]) # slope and y-intercept
156
+ ite = epoch # number of training iterations
157
+
158
+ jList = []
159
+ numIte = []
160
+
161
+ # Write Linear Regression Code to Solve for w (slope and y-intercept) Here ##
162
+ progress(0.5, desc="Finding Loss & Regression")
163
+ time.sleep(1.5)
164
+
165
+ for p in range (ite):
166
+ for i in range(len(x)):
167
+ # Calculate w and J here
168
+ x_vec = np.matrix([x[i][0],1]) # Option 1 | Setting up a vector for x (x_vec[j] corresponds to w[j])
169
+ h = w * x_vec.T ## Hint: you may need to transpose x or w by adding .T to the end of the variable
170
+ w = w - a * (h - y[i]) * x_vec
171
+ J = (1/2) * (((h - y[i])) ** 2)
172
+ J = J.item()
173
+
174
+ jList.append(J)
175
+ numIte.append(p)
176
+ print('Loss:', J)
177
+
178
+ ## if done correctly the line should be in line with the data points ##
179
+
180
+ print('f = ', w[0,0],'x + ', w[0,1])
181
+ equation = "f = {w1}x + {w2}".format(w1 = w[0,0], w2 = w[0,1])
182
+
183
+ progress(0.8, desc="Plotting Data")
184
+ time.sleep(1.5)
185
+ y2 = np.array(np.array((w[0,1]+(w[0,0] * x)),dtype=float)).T
186
+
187
+ scatterData = pd.DataFrame({'x': x.flatten(),
188
+ 'y': y.flatten()})
189
+ scatterFig = alt.Chart(scatterData).mark_point().encode(
190
+ x='x:Q',
191
+ y='y:Q'
192
+ ).properties(
193
+ title='Plot of random data values with linear regression line'
194
+ )
195
+
196
+ trendLine = pd.DataFrame({'x': x.flatten(),
197
+ 'y': y2.flatten() })
198
+ trendLineFig = alt.Chart(trendLine).mark_line().encode(
199
+ x='x:Q',
200
+ y='y:Q'
201
+ )
202
+
203
+ finalFig = scatterFig + trendLineFig
204
+
205
+ lossData = pd.DataFrame({'Number of Iterations': range(1,len(jList)+1),
206
+ 'Loss Value': jList })
207
+ lossFig = alt.Chart(lossData).mark_line().encode(
208
+ x='Number of Iterations:Q',
209
+ y='Loss Value:Q'
210
+ ).properties(
211
+ title='Plot of loss values over number of iterations'
212
+ )
213
+
214
+ # plot
215
+ plt.figure(1)
216
+ plt.scatter(x,y,s=ite)
217
+ plt.plot(x, w[0,1] + (w[0,0] * x), linestyle='solid')
218
+ plt.xlabel('x')
219
+ plt.ylabel('y')
220
+ plt.title('Plot of random data values with linear regression line')
221
+ plt.savefig("plt1.png")
222
+
223
+ plt.figure(2)
224
+ plt.plot(jList)
225
+ plt.xlabel('Number of Iterations')
226
+ plt.ylabel('Loss Value')
227
+ plt.title('Plot of loss values over number of iterations')
228
+ plt.savefig("plt2.png")
229
+
230
+ return [finalFig.interactive(),lossFig.interactive(),"plt1.png","plt2.png",str(jList[len(jList)-1]),str(equation)]
231
+
232
+ with gr.Blocks(title="Regression Visualization") as demo:
233
+ gr.Markdown(
234
+ """
235
+ # Regression Visualization for Machine Learning
236
+ Choose your variables below to create a linear or logistic regression model!
237
+ """)
238
+ with gr.Row():
239
+ pack = gr.Radio(label="Plot Package",info="Choose 'MatPlot' for MatPlotLib, Choose 'Altair' for Altair",
240
+ choices=['MatPlot','Altair'], value='Altair')
241
+ bType = gr.Radio(label="Regression Type",info="Choose 'log' for logistic, Choose 'lin' for linear",
242
+ choices=['log','lin'], value='log')
243
+ l_rate = gr.Number(value=0.01,label="Learning Rate",info="Enter a value in the range 0.0 - 1.0")
244
+ epochs = gr.Number(value=100,label="Number of Epochs (Number of Training Iterations)",info="Enter an integer larger than 0",precision=0)
245
+ bStart = gr.Button(label="Start")
246
+ with gr.Row() as alt_row:
247
+ altPlot1 = gr.Plot()
248
+ altPlot2 = gr.Plot()
249
+ with gr.Row(visible=False) as mat_row:
250
+ matPlot1 = gr.Image(type='filepath',label="Regression Graph",height=600,width=600)
251
+ matPlot2 = gr.Image(type='filepath',label="Regression Graph",height=600,width=600)
252
+ loss = gr.Textbox(label="Final Loss Value")
253
+ equ = gr.Textbox(label="Equation for Plotted Line")
254
+ def changeComp(package):
255
+ if package == "Altair":
256
+ return {
257
+ alt_row: gr.Row.update(visible=True),
258
+ mat_row: gr.Row.update(visible=False)
259
+ }
260
+ else:
261
+ return {
262
+ alt_row: gr.Row.update(visible=False),
263
+ mat_row: gr.Row.update(visible=True)
264
+ }
265
+
266
+ pack.input(changeComp, show_progress=True, inputs=[pack], outputs=[alt_row, mat_row])
267
+ bStart.click(make_plot, show_progress=True, inputs=[bType,l_rate,epochs], outputs=[altPlot1,altPlot2, matPlot1, matPlot2, loss, equ])
268
+ demo.load()
269
+
270
+ if __name__== "__main__" :
271
+ demo.queue().launch()