Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import gradio as gd | |
| import pandas as pd | |
| from sklearn.metrics import r2_score | |
| global df_trn, df_hat_trn, df_tst, df_hat_tst | |
| # Vanilla Legendre between [0,1] | |
| def Pn(m, x): | |
| if m == 0: | |
| return np.ones_like(x) | |
| elif m == 1: | |
| return x | |
| else: | |
| return (2*m-1)*x*Pn(m-1, x)/m - (m-1)*Pn(m-2, x)/m | |
| # Legendre between [a,b] | |
| def L(a,b,m,x): | |
| return np.sqrt((2*m+1)/(b-a))*Pn(m, 2*(x-b)/(b-a)+1) | |
| def sobol(x, y, m, range_min, range_max): | |
| print(x.shape, y.shape) | |
| N, n = x.shape | |
| f0 = np.mean(y) | |
| print(f'f0:{f0}') | |
| alpha = np.zeros((m, n)) | |
| for r in range(m): | |
| for i in range(n): | |
| alpha[r, i] = (range_max-range_min) * np.mean((y-f0) * L(range_min, range_max, r+1, np.array(x[:, [i]]))) | |
| global_D = (range_max-range_min)*np.mean(y ** 2) - ((range_max-range_min)*np.mean(y)) ** 2 | |
| D_first_order = np.zeros((n)) | |
| S_first_order = np.zeros((n)) | |
| for k in range(n): | |
| D_first_order[k] = sum(alpha[r,k] ** 2 for r in range(m)) | |
| S_first_order[k] = D_first_order[k]/global_D | |
| return S_first_order, f0, alpha | |
| def evalute_hdmr(x, f0, alpha, range_min, range_max): | |
| N, n = x.shape | |
| m, _ = alpha.shape | |
| y = f0 * np.ones((N,1)) | |
| for r in range(m): | |
| for i in range(n): | |
| y = y + alpha[r, i] * L(range_min, range_max, r+1, np.array(x[:, [i]])) | |
| return y | |
| def f(x): | |
| return np.sum((x-0.5)**2,axis=1,keepdims=True) | |
| def optimize(N,n,m,func_code,trn_ratio,range_min,range_max): | |
| global df_trn, df_hat_trn, df_tst, df_hat_tst | |
| print(func_code) | |
| print(N) | |
| N_trn = round(N*trn_ratio) | |
| N_tst = N - N_trn | |
| print(f'N:{N} N_trn:{N_trn} N_tst:{N_tst}') | |
| x_trn = np.random.uniform(range_min,range_max,size=(N_trn,n)) | |
| y_trn = eval(func_code, {'x': x_trn,'np':np}) | |
| x_tst = np.random.uniform(range_min,range_max,size=(N_tst,n)) | |
| y_tst = eval(func_code, {'x': x_tst,'np':np}) | |
| si, f0, alpha = sobol(x_trn, y_trn, m, range_min, range_max) | |
| yhat_trn = evalute_hdmr(x_trn, f0, alpha, range_min, range_max) | |
| yhat_tst = evalute_hdmr(x_tst, f0, alpha, range_min, range_max) | |
| out = '' | |
| out += f'trn x:{x_trn.shape} y:{y_trn.shape}' | |
| out += f'tst x:{x_tst.shape} y:{y_tst.shape}' | |
| out += f'{si}' | |
| # Input/Output data concatenated | |
| xy_trn = np.concatenate([x_trn,y_trn],axis=1) | |
| xyhat_trn = np.concatenate([x_trn,yhat_trn],axis=1) | |
| y_vs_yhat_trn = np.concatenate([y_trn,yhat_trn],axis=1) | |
| r2_trn = r2_score(y_trn, yhat_trn) | |
| xy_tst = np.concatenate([x_tst,y_tst],axis=1) | |
| xyhat_tst = np.concatenate([x_tst,yhat_tst],axis=1) | |
| y_vs_yhat_tst = np.concatenate([y_tst,yhat_tst],axis=1) | |
| r2_tst = r2_score(y_tst, yhat_tst) | |
| # x1, x2, ... , xn, y | |
| columns = [f'x{id+1}' for id in range(n)] + ['y'] | |
| print(columns) | |
| df_trn = pd.DataFrame(xy_trn, columns=columns) | |
| df_hat_trn = pd.DataFrame(xyhat_trn, columns=columns) | |
| df_corr_trn = pd.DataFrame(y_vs_yhat_trn, columns=['y','yhat']) | |
| df_tst = pd.DataFrame(xy_tst, columns=columns) | |
| df_hat_tst = pd.DataFrame(xyhat_tst, columns=columns) | |
| df_corr_tst = pd.DataFrame(y_vs_yhat_tst, columns=['y','yhat']) | |
| return (out, | |
| gd.Dropdown.update(choices=[f'x{id+1}' for id in range(n)]), | |
| gd.ScatterPlot.update(value=df_trn,x='x1',y='y'), | |
| gd.ScatterPlot.update(value=df_hat_trn,x='x1',y='y'), | |
| gd.ScatterPlot.update(value=df_corr_trn,x='y',y='yhat'), | |
| gd.Markdown.update(value=f'R2:{r2_trn}'), | |
| gd.ScatterPlot.update(value=df_tst,x='x1',y='y'), | |
| gd.ScatterPlot.update(value=df_hat_tst,x='x1',y='y'), | |
| gd.ScatterPlot.update(value=df_corr_tst,x='y',y='yhat'), | |
| gd.Markdown.update(value=f'R2:{r2_tst}')) | |
| def change_pdp(pdp_x): | |
| print(pdp_x, type(pdp_x)) | |
| global df_trn, df_hat_trn, df_tst, df_hat_tst | |
| return (gd.ScatterPlot.update(value=df_trn, x=pdp_x, y='y'), | |
| gd.ScatterPlot.update(value=df_hat_trn, x=pdp_x, y='y'), | |
| gd.ScatterPlot.update(value=df_tst, x=pdp_x, y='y'), | |
| gd.ScatterPlot.update(value=df_hat_tst, x=pdp_x, y='y')) | |
| with gd.Blocks() as demo: | |
| with gd.Row(): | |
| with gd.Column(): | |
| N = gd.Slider(10, 5000, value=1000, step=100, label="Number of samples", | |
| info=""" | |
| Please specify how many sample points you want to | |
| generate to generate meta-model. | |
| """) | |
| n = gd.Slider(2, 10, step=1, label="Number of variables", | |
| info=""" | |
| Please specify how many variables you have | |
| in the optimized function. | |
| """) | |
| m = gd.Slider(1, 20, step=1, value=3, label="Number of bases", | |
| info=""" | |
| Please specify how many base functions you use | |
| to construct the meta-model. | |
| """) | |
| trn_ratio = gd.Slider(0.1, 0.9, value=0.2, step=0.1, label="Training ratio") | |
| range_min = gd.Slider(-10.0, 10.0, step=0.01, value=-5, label="Minimum value of the range", | |
| info=""" | |
| Please choose the lowest value of the range. Make sure the range_min is less than | |
| range_max. | |
| """) | |
| range_max = gd.Slider(-10.0, 10.0, step=0.01, value=5, label="Maximum value of the range", | |
| info=""" | |
| Please choose the highest value of the range. Make sure the range_max is greate than | |
| range_min. | |
| """) | |
| with gd.Column(): | |
| func_disp = gd.Markdown() | |
| func_code = gd.Code( | |
| value="np.sum((10*x-5)**2 - 10*np.cos(2*3.14*(10*x-5)),axis=1,keepdims=True) +10*x.shape[1]", | |
| language="python", | |
| lines=1, | |
| label="Model function: input is x with shape (N,n) and output is y with shape (N,1)", | |
| interactive=True | |
| ) | |
| btn = gd.Button('Create FEOM') | |
| gd.Examples([["np.sum((10*x-5)**2 - 10*np.cos(2*3.14*(10*x-5)),axis=1,keepdims=True) +10*x.shape[1]", | |
| "$$\mathbf{y} = 10n+\sum_{i=1}^n[\mathbf{x}_i^2-10 \cos(2 \pi x_i)] \;\; \mathbf{x}_i \in R^{N}$$", | |
| 0,1], | |
| ["np.sum((x-0.5)**2,axis=1,keepdims=True)","$$\mathbf{y} = \sum_i^n(\mathbf{x}_i-0.5)^2,\;\; \mathbf{x}_i \in R^{N}$$",0,1], | |
| ["np.sum(x,axis=1,keepdims=True)","$$\mathbf{y} = \sum_i^n \mathbf{x}_i \;\; \mathbf{x}_i \in R^{N}$$",0,1], | |
| ["np.sum((x-0.5)**3,axis=1,keepdims=True)","$$\mathbf{y} = \sum_i^n (\mathbf{x}_i-0.5)^3 \;\; \mathbf{x}_i \in R^{N}$$",0,1]], | |
| inputs=[func_code,func_disp,range_min,range_max]) | |
| with gd.Row(): | |
| gd.Markdown('Original Training Data') | |
| gd.Markdown('HDMR Approximation applied to Training Data') | |
| r2_out_trn = gd.Markdown('R2 Score on Training') | |
| with gd.Row(): | |
| pdp_trn = gd.ScatterPlot(label='Scatter plot of input data').style(container=True) | |
| feom_trn = gd.ScatterPlot(label='Scatter plot of FEOM').style(container=True) | |
| corr_trn = gd.ScatterPlot(label='y versus feom') | |
| with gd.Row(): | |
| gd.Markdown('Original Testing Data') | |
| gd.Markdown('HDMR Approximation applied to Testing Data') | |
| r2_out_tst = gd.Markdown('R2 Score on Testing') | |
| with gd.Row(): | |
| pdp_tst = gd.ScatterPlot(label='Scatter plot of input data').style(container=True) | |
| feom_tst = gd.ScatterPlot(label='Scatter plot of FEOM').style(container=True) | |
| corr_tst = gd.ScatterPlot(label='y versus feom') | |
| pdp_x = gd.Dropdown(['x1','x2'], label="Choose input variable to show in scatter plots") | |
| out = gd.TextArea() | |
| btn.click(optimize, inputs=[N,n,m,func_code, trn_ratio,range_min,range_max], | |
| outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn, | |
| pdp_tst, feom_tst,corr_tst,r2_out_tst]) | |
| n.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min,range_max], | |
| outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn, | |
| pdp_tst, feom_tst,corr_tst,r2_out_tst]) | |
| N.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min,range_max], | |
| outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn, | |
| pdp_tst, feom_tst,corr_tst,r2_out_tst]) | |
| m.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min, range_max], | |
| outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn, | |
| pdp_tst, feom_tst,corr_tst,r2_out_tst]) | |
| trn_ratio.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min, range_max], | |
| outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn, | |
| pdp_tst, feom_tst,corr_tst,r2_out_tst]) | |
| range_min.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min, range_max], | |
| outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn, | |
| pdp_tst, feom_tst,corr_tst,r2_out_tst]) | |
| range_max.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min, range_max], | |
| outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn, | |
| pdp_tst, feom_tst,corr_tst,r2_out_tst]) | |
| pdp_x.change(change_pdp, inputs=pdp_x, outputs=[pdp_trn, feom_trn, pdp_tst, feom_tst]) | |
| demo.launch() |