hdmr / app.py
hkayabilisim's picture
Live update enabled for range sliders
c6bb2d8
import numpy as np
import gradio as gd
import pandas as pd
from sklearn.metrics import r2_score
global df_trn, df_hat_trn, df_tst, df_hat_tst
# Vanilla Legendre between [0,1]
def Pn(m, x):
if m == 0:
return np.ones_like(x)
elif m == 1:
return x
else:
return (2*m-1)*x*Pn(m-1, x)/m - (m-1)*Pn(m-2, x)/m
# Legendre between [a,b]
def L(a,b,m,x):
return np.sqrt((2*m+1)/(b-a))*Pn(m, 2*(x-b)/(b-a)+1)
def sobol(x, y, m, range_min, range_max):
print(x.shape, y.shape)
N, n = x.shape
f0 = np.mean(y)
print(f'f0:{f0}')
alpha = np.zeros((m, n))
for r in range(m):
for i in range(n):
alpha[r, i] = (range_max-range_min) * np.mean((y-f0) * L(range_min, range_max, r+1, np.array(x[:, [i]])))
global_D = (range_max-range_min)*np.mean(y ** 2) - ((range_max-range_min)*np.mean(y)) ** 2
D_first_order = np.zeros((n))
S_first_order = np.zeros((n))
for k in range(n):
D_first_order[k] = sum(alpha[r,k] ** 2 for r in range(m))
S_first_order[k] = D_first_order[k]/global_D
return S_first_order, f0, alpha
def evalute_hdmr(x, f0, alpha, range_min, range_max):
N, n = x.shape
m, _ = alpha.shape
y = f0 * np.ones((N,1))
for r in range(m):
for i in range(n):
y = y + alpha[r, i] * L(range_min, range_max, r+1, np.array(x[:, [i]]))
return y
def f(x):
return np.sum((x-0.5)**2,axis=1,keepdims=True)
def optimize(N,n,m,func_code,trn_ratio,range_min,range_max):
global df_trn, df_hat_trn, df_tst, df_hat_tst
print(func_code)
print(N)
N_trn = round(N*trn_ratio)
N_tst = N - N_trn
print(f'N:{N} N_trn:{N_trn} N_tst:{N_tst}')
x_trn = np.random.uniform(range_min,range_max,size=(N_trn,n))
y_trn = eval(func_code, {'x': x_trn,'np':np})
x_tst = np.random.uniform(range_min,range_max,size=(N_tst,n))
y_tst = eval(func_code, {'x': x_tst,'np':np})
si, f0, alpha = sobol(x_trn, y_trn, m, range_min, range_max)
yhat_trn = evalute_hdmr(x_trn, f0, alpha, range_min, range_max)
yhat_tst = evalute_hdmr(x_tst, f0, alpha, range_min, range_max)
out = ''
out += f'trn x:{x_trn.shape} y:{y_trn.shape}'
out += f'tst x:{x_tst.shape} y:{y_tst.shape}'
out += f'{si}'
# Input/Output data concatenated
xy_trn = np.concatenate([x_trn,y_trn],axis=1)
xyhat_trn = np.concatenate([x_trn,yhat_trn],axis=1)
y_vs_yhat_trn = np.concatenate([y_trn,yhat_trn],axis=1)
r2_trn = r2_score(y_trn, yhat_trn)
xy_tst = np.concatenate([x_tst,y_tst],axis=1)
xyhat_tst = np.concatenate([x_tst,yhat_tst],axis=1)
y_vs_yhat_tst = np.concatenate([y_tst,yhat_tst],axis=1)
r2_tst = r2_score(y_tst, yhat_tst)
# x1, x2, ... , xn, y
columns = [f'x{id+1}' for id in range(n)] + ['y']
print(columns)
df_trn = pd.DataFrame(xy_trn, columns=columns)
df_hat_trn = pd.DataFrame(xyhat_trn, columns=columns)
df_corr_trn = pd.DataFrame(y_vs_yhat_trn, columns=['y','yhat'])
df_tst = pd.DataFrame(xy_tst, columns=columns)
df_hat_tst = pd.DataFrame(xyhat_tst, columns=columns)
df_corr_tst = pd.DataFrame(y_vs_yhat_tst, columns=['y','yhat'])
return (out,
gd.Dropdown.update(choices=[f'x{id+1}' for id in range(n)]),
gd.ScatterPlot.update(value=df_trn,x='x1',y='y'),
gd.ScatterPlot.update(value=df_hat_trn,x='x1',y='y'),
gd.ScatterPlot.update(value=df_corr_trn,x='y',y='yhat'),
gd.Markdown.update(value=f'R2:{r2_trn}'),
gd.ScatterPlot.update(value=df_tst,x='x1',y='y'),
gd.ScatterPlot.update(value=df_hat_tst,x='x1',y='y'),
gd.ScatterPlot.update(value=df_corr_tst,x='y',y='yhat'),
gd.Markdown.update(value=f'R2:{r2_tst}'))
def change_pdp(pdp_x):
print(pdp_x, type(pdp_x))
global df_trn, df_hat_trn, df_tst, df_hat_tst
return (gd.ScatterPlot.update(value=df_trn, x=pdp_x, y='y'),
gd.ScatterPlot.update(value=df_hat_trn, x=pdp_x, y='y'),
gd.ScatterPlot.update(value=df_tst, x=pdp_x, y='y'),
gd.ScatterPlot.update(value=df_hat_tst, x=pdp_x, y='y'))
with gd.Blocks() as demo:
with gd.Row():
with gd.Column():
N = gd.Slider(10, 5000, value=1000, step=100, label="Number of samples",
info="""
Please specify how many sample points you want to
generate to generate meta-model.
""")
n = gd.Slider(2, 10, step=1, label="Number of variables",
info="""
Please specify how many variables you have
in the optimized function.
""")
m = gd.Slider(1, 20, step=1, value=3, label="Number of bases",
info="""
Please specify how many base functions you use
to construct the meta-model.
""")
trn_ratio = gd.Slider(0.1, 0.9, value=0.2, step=0.1, label="Training ratio")
range_min = gd.Slider(-10.0, 10.0, step=0.01, value=-5, label="Minimum value of the range",
info="""
Please choose the lowest value of the range. Make sure the range_min is less than
range_max.
""")
range_max = gd.Slider(-10.0, 10.0, step=0.01, value=5, label="Maximum value of the range",
info="""
Please choose the highest value of the range. Make sure the range_max is greate than
range_min.
""")
with gd.Column():
func_disp = gd.Markdown()
func_code = gd.Code(
value="np.sum((10*x-5)**2 - 10*np.cos(2*3.14*(10*x-5)),axis=1,keepdims=True) +10*x.shape[1]",
language="python",
lines=1,
label="Model function: input is x with shape (N,n) and output is y with shape (N,1)",
interactive=True
)
btn = gd.Button('Create FEOM')
gd.Examples([["np.sum((10*x-5)**2 - 10*np.cos(2*3.14*(10*x-5)),axis=1,keepdims=True) +10*x.shape[1]",
"$$\mathbf{y} = 10n+\sum_{i=1}^n[\mathbf{x}_i^2-10 \cos(2 \pi x_i)] \;\; \mathbf{x}_i \in R^{N}$$",
0,1],
["np.sum((x-0.5)**2,axis=1,keepdims=True)","$$\mathbf{y} = \sum_i^n(\mathbf{x}_i-0.5)^2,\;\; \mathbf{x}_i \in R^{N}$$",0,1],
["np.sum(x,axis=1,keepdims=True)","$$\mathbf{y} = \sum_i^n \mathbf{x}_i \;\; \mathbf{x}_i \in R^{N}$$",0,1],
["np.sum((x-0.5)**3,axis=1,keepdims=True)","$$\mathbf{y} = \sum_i^n (\mathbf{x}_i-0.5)^3 \;\; \mathbf{x}_i \in R^{N}$$",0,1]],
inputs=[func_code,func_disp,range_min,range_max])
with gd.Row():
gd.Markdown('Original Training Data')
gd.Markdown('HDMR Approximation applied to Training Data')
r2_out_trn = gd.Markdown('R2 Score on Training')
with gd.Row():
pdp_trn = gd.ScatterPlot(label='Scatter plot of input data').style(container=True)
feom_trn = gd.ScatterPlot(label='Scatter plot of FEOM').style(container=True)
corr_trn = gd.ScatterPlot(label='y versus feom')
with gd.Row():
gd.Markdown('Original Testing Data')
gd.Markdown('HDMR Approximation applied to Testing Data')
r2_out_tst = gd.Markdown('R2 Score on Testing')
with gd.Row():
pdp_tst = gd.ScatterPlot(label='Scatter plot of input data').style(container=True)
feom_tst = gd.ScatterPlot(label='Scatter plot of FEOM').style(container=True)
corr_tst = gd.ScatterPlot(label='y versus feom')
pdp_x = gd.Dropdown(['x1','x2'], label="Choose input variable to show in scatter plots")
out = gd.TextArea()
btn.click(optimize, inputs=[N,n,m,func_code, trn_ratio,range_min,range_max],
outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn,
pdp_tst, feom_tst,corr_tst,r2_out_tst])
n.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min,range_max],
outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn,
pdp_tst, feom_tst,corr_tst,r2_out_tst])
N.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min,range_max],
outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn,
pdp_tst, feom_tst,corr_tst,r2_out_tst])
m.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min, range_max],
outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn,
pdp_tst, feom_tst,corr_tst,r2_out_tst])
trn_ratio.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min, range_max],
outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn,
pdp_tst, feom_tst,corr_tst,r2_out_tst])
range_min.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min, range_max],
outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn,
pdp_tst, feom_tst,corr_tst,r2_out_tst])
range_max.change(optimize, inputs=[N,n,m,func_code,trn_ratio,range_min, range_max],
outputs=[out, pdp_x, pdp_trn, feom_trn,corr_trn,r2_out_trn,
pdp_tst, feom_tst,corr_tst,r2_out_tst])
pdp_x.change(change_pdp, inputs=pdp_x, outputs=[pdp_trn, feom_trn, pdp_tst, feom_tst])
demo.launch()