import numpy as np import gradio as gd import pandas as pd from sklearn.metrics import r2_score global df_trn, df_hat_trn, df_tst, df_hat_tst # Vanilla Legendre between [0,1] def Pn(m, x): if m == 0: return np.ones_like(x) elif m == 1: return x else: return (2*m-1)*x*Pn(m-1, x)/m - (m-1)*Pn(m-2, x)/m # Legendre between [a,b] def L(a,b,m,x): return np.sqrt((2*m+1)/(b-a))*Pn(m, 2*(x-b)/(b-a)+1) def sobol(x, y, m, range_min, range_max): print(x.shape, y.shape) N, n = x.shape f0 = np.mean(y) print(f'f0:{f0}') alpha = np.zeros((m, n)) for r in range(m): for i in range(n): alpha[r, i] = (range_max-range_min) * np.mean((y-f0) * L(range_min, range_max, r+1, np.array(x[:, [i]]))) global_D = (range_max-range_min)*np.mean(y ** 2) - ((range_max-range_min)*np.mean(y)) ** 2 D_first_order = np.zeros((n)) S_first_order = np.zeros((n)) for k in range(n): D_first_order[k] = sum(alpha[r,k] ** 2 for r in range(m)) S_first_order[k] = D_first_order[k]/global_D return S_first_order, f0, alpha def evalute_hdmr(x, f0, alpha, range_min, range_max): N, n = x.shape m, _ = alpha.shape y = f0 * np.ones((N,1)) for r in range(m): for i in range(n): y = y + alpha[r, i] * L(range_min, range_max, r+1, np.array(x[:, [i]])) return y def f(x): return np.sum((x-0.5)**2,axis=1,keepdims=True) def optimize(N,n,m,func_code,trn_ratio,range_min,range_max): global df_trn, df_hat_trn, df_tst, df_hat_tst print(func_code) print(N) N_trn = round(N*trn_ratio) N_tst = N - N_trn print(f'N:{N} N_trn:{N_trn} N_tst:{N_tst}') x_trn = np.random.uniform(range_min,range_max,size=(N_trn,n)) y_trn = eval(func_code, {'x': x_trn,'np':np}) x_tst = np.random.uniform(range_min,range_max,size=(N_tst,n)) y_tst = eval(func_code, {'x': x_tst,'np':np}) si, f0, alpha = sobol(x_trn, y_trn, m, range_min, range_max) yhat_trn = evalute_hdmr(x_trn, f0, alpha, range_min, range_max) yhat_tst = evalute_hdmr(x_tst, f0, alpha, range_min, range_max) out = '' out += f'trn x:{x_trn.shape} y:{y_trn.shape}' out += f'tst x:{x_tst.shape} y:{y_tst.shape}' out += f'{si}' # Input/Output data concatenated xy_trn = np.concatenate([x_trn,y_trn],axis=1) xyhat_trn = np.concatenate([x_trn,yhat_trn],axis=1) y_vs_yhat_trn = np.concatenate([y_trn,yhat_trn],axis=1) r2_trn = r2_score(y_trn, yhat_trn) xy_tst = np.concatenate([x_tst,y_tst],axis=1) xyhat_tst = np.concatenate([x_tst,yhat_tst],axis=1) y_vs_yhat_tst = np.concatenate([y_tst,yhat_tst],axis=1) r2_tst = r2_score(y_tst, yhat_tst) # x1, x2, ... , xn, y columns = [f'x{id+1}' for id in range(n)] + ['y'] print(columns) df_trn = pd.DataFrame(xy_trn, columns=columns) df_hat_trn = pd.DataFrame(xyhat_trn, columns=columns) df_corr_trn = pd.DataFrame(y_vs_yhat_trn, columns=['y','yhat']) df_tst = pd.DataFrame(xy_tst, columns=columns) df_hat_tst = pd.DataFrame(xyhat_tst, columns=columns) df_corr_tst = pd.DataFrame(y_vs_yhat_tst, columns=['y','yhat']) return (out, gd.Dropdown.update(choices=[f'x{id+1}' for id in range(n)]), gd.ScatterPlot.update(value=df_trn,x='x1',y='y'), gd.ScatterPlot.update(value=df_hat_trn,x='x1',y='y'), gd.ScatterPlot.update(value=df_corr_trn,x='y',y='yhat'), gd.Markdown.update(value=f'R2:{r2_trn}'), gd.ScatterPlot.update(value=df_tst,x='x1',y='y'), gd.ScatterPlot.update(value=df_hat_tst,x='x1',y='y'), gd.ScatterPlot.update(value=df_corr_tst,x='y',y='yhat'), gd.Markdown.update(value=f'R2:{r2_tst}')) def change_pdp(pdp_x): print(pdp_x, type(pdp_x)) global df_trn, df_hat_trn, df_tst, df_hat_tst return (gd.ScatterPlot.update(value=df_trn, x=pdp_x, y='y'), gd.ScatterPlot.update(value=df_hat_trn, x=pdp_x, y='y'), gd.ScatterPlot.update(value=df_tst, x=pdp_x, y='y'), gd.ScatterPlot.update(value=df_hat_tst, x=pdp_x, y='y')) with gd.Blocks() as demo: with gd.Row(): with gd.Column(): N = gd.Slider(10, 5000, value=1000, step=100, label="Number of samples", info=""" Please specify how many sample points you want to generate to generate meta-model. """) n = gd.Slider(2, 10, step=1, label="Number of variables", info=""" Please specify how many variables you have in the optimized function. """) m = gd.Slider(1, 20, step=1, value=3, label="Number of bases", info=""" Please specify how many base functions you use to construct the meta-model. """) trn_ratio = gd.Slider(0.1, 0.9, value=0.2, step=0.1, label="Training ratio") range_min = gd.Slider(-10.0, 10.0, step=0.01, value=-5, label="Minimum value of the range", info=""" Please choose the lowest value of the range. Make sure the range_min is less than range_max. """) range_max = gd.Slider(-10.0, 10.0, step=0.01, value=5, label="Maximum value of the range", info=""" Please choose the highest value of the range. Make sure the range_max is greate than range_min. """) with gd.Column(): func_disp = gd.Markdown() func_code = gd.Code( value="np.sum((10*x-5)**2 - 10*np.cos(2*3.14*(10*x-5)),axis=1,keepdims=True) +10*x.shape[1]", language="python", lines=1, label="Model function: input is x with shape (N,n) and output is y with shape (N,1)", interactive=True ) btn = gd.Button('Create FEOM') gd.Examples([["np.sum((10*x-5)**2 - 10*np.cos(2*3.14*(10*x-5)),axis=1,keepdims=True) +10*x.shape[1]", "$$\mathbf{y} = 10n+\sum_{i=1}^n[\mathbf{x}_i^2-10 \cos(2 \pi x_i)] \;\; \mathbf{x}_i \in R^{N}$$", 0,1], ["np.sum((x-0.5)**2,axis=1,keepdims=True)","$$\mathbf{y} = \sum_i^n(\mathbf{x}_i-0.5)^2,\;\; \mathbf{x}_i \in R^{N}$$",0,1], ["np.sum(x,axis=1,keepdims=True)","$$\mathbf{y} = \sum_i^n \mathbf{x}_i \;\; \mathbf{x}_i \in R^{N}$$",0,1], ["np.sum((x-0.5)**3,axis=1,keepdims=True)","$$\mathbf{y} = \sum_i^n (\mathbf{x}_i-0.5)^3 \;\; \mathbf{x}_i \in R^{N}$$",0,1]], inputs=[func_code,func_disp,range_min,range_max]) with gd.Row(): gd.Markdown('Original Training Data') gd.Markdown('HDMR Approximation applied to Training Data') r2_out_trn = gd.Markdown('R2 Score on Training') with gd.Row(): pdp_trn = gd.ScatterPlot(label='Scatter plot of input data').style(container=True) feom_trn = gd.ScatterPlot(label='Scatter plot of FEOM').style(container=True) corr_trn = gd.ScatterPlot(label='y versus feom') with gd.Row(): gd.Markdown('Original Testing Data') gd.Markdown('HDMR Approximation applied to Testing Data') r2_out_tst = gd.Markdown('R2 Score on Testing') with gd.Row(): pdp_tst = gd.ScatterPlot(label='Scatter plot of input data').style(container=True) feom_tst = gd.ScatterPlot(label='Scatter plot of FEOM').style(container=True) corr_tst = gd.ScatterPlot(label='y versus feom') pdp_x = gd.Dropdown(['x1','x2'], label="Choose input variable to show in scatter plots") out = gd.TextArea() btn.click(optimize, inputs=[N,n,m,func_code, trn_ratio,range_min,range_max], outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn, pdp_tst, feom_tst,corr_tst,r2_out_tst]) n.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min,range_max], outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn, pdp_tst, feom_tst,corr_tst,r2_out_tst]) N.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min,range_max], outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn, pdp_tst, feom_tst,corr_tst,r2_out_tst]) m.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min, range_max], outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn, pdp_tst, feom_tst,corr_tst,r2_out_tst]) trn_ratio.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min, range_max], outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn, pdp_tst, feom_tst,corr_tst,r2_out_tst]) range_min.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min, range_max], outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn, pdp_tst, feom_tst,corr_tst,r2_out_tst]) range_max.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min, range_max], outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn, pdp_tst, feom_tst,corr_tst,r2_out_tst]) pdp_x.change(change_pdp, inputs=pdp_x, outputs=[pdp_trn, feom_trn, pdp_tst, feom_tst]) demo.launch()