### Notebook for generating datasets 

Create datasets using apebench (jax) to be used by torch models for trainings.
Agree on a fixed naming convention.

In [1]:
import apebench
import numpy as np
import jax.numpy as jnp
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt

 from tqdm.autonotebook import tqdm


In [12]:
# data generation configs
spatial_grid_points = 128
spatial_dims = 2
num_train_samples = 100
train_time_steps = 50
num_test_samples = 40
test_time_steps = 200


Data from advection scenario

Advection hard for 2D

In [13]:
# more difficult advection equation
advection2D_hard_scenario = apebench.scenarios.difficulty.Advection(
 
 optim_config="adam;10_000;constant;1e-4",

 report_metrics="mean_nRMSE", # "mean_nRMSE,mean_fourier_nRMSE;0;5;0,mean_H1_nRMSE"
 
 num_points = spatial_grid_points, # discretized points on the spatial 

 num_spatial_dims = spatial_dims, # spatial dimensions 
 
 num_train_samples = num_train_samples, # number of ICs for train set
 
 train_temporal_horizon = train_time_steps, # consecutive time steps the simulator is run

 train_seed = 0,

 num_test_samples = num_test_samples, # number of ICs for test set
 
 test_temporal_horizon = test_time_steps, # consecutive time steps the simulator is run for test set

 test_seed = 773,
 
 advection_gamma=-10.5,
)

In [14]:
train_data = advection2D_hard_scenario.get_train_data()
print('train data shape',train_data.shape)

test_ic_set = advection2D_hard_scenario.get_test_ic_set()
print('test ICs shape',test_ic_set.shape)

full_test_data = advection2D_hard_scenario.get_test_data() 
print('complete test data shape',full_test_data.shape)

val_data = full_test_data[:10,:51,:,:]
print('val data',val_data.shape)

test_data = full_test_data[10:,:,:,:]
print('test data after removing val',test_data.shape)

train data shape (100, 51, 1, 128, 128)
test ICs shape (40, 1, 128, 128)
complete test data shape (40, 201, 1, 128, 128)
val data (10, 51, 1, 128, 128)
test data after removing val (30, 201, 1, 128, 128)


In [15]:
# jnp.save("advection2D_hard_train_data_128x128_100ic_50t.npy", train_data)
# jnp.save("advection2D_hard_val_data_128x128_10ic_50t.npy", val_data)
# jnp.save("advection2D_hard_test_data_128x128_30ic_200t.npy", test_data)


Data from kuramoto sivashinksy scenario

In [16]:
kuramoto2D_scenario = apebench.scenarios.difficulty.KuramotoSivashinsky(
 
 optim_config="adam;10_000;constant;1e-4",
 
 report_metrics="mean_nRMSE", # "mean_nRMSE,mean_fourier_nRMSE;0;5;0,mean_H1_nRMSE"
 
 num_points = spatial_grid_points, # discretized points on the spatial 

 num_spatial_dims = spatial_dims, # spatial dimensions 
 
 num_train_samples = num_train_samples, # number of ICs for train set
 
 train_temporal_horizon = train_time_steps, # consecutive time steps the simulator is run

 train_seed = 0,

 num_test_samples = num_test_samples, # number of ICs for test set
 
 test_temporal_horizon = test_time_steps, # consecutive time steps the simulator is run for test set

 test_seed = 773,
)

In [17]:
# data from kuramoto scenario 
train_data = kuramoto2D_scenario.get_train_data()
print('train data shape',train_data.shape)

test_ic_set = kuramoto2D_scenario.get_test_ic_set()
print('test ICs shape',test_ic_set.shape)

full_test_data = kuramoto2D_scenario.get_test_data() 
print('complete test data shape',full_test_data.shape)

val_data = full_test_data[:10,:51,:,:]
print('val data',val_data.shape)

test_data = full_test_data[10:,:,:,:]
print('test data after removing val',test_data.shape)


train data shape (100, 51, 1, 128, 128)
test ICs shape (40, 1, 128, 128)
complete test data shape (40, 201, 1, 128, 128)
val data (10, 51, 1, 128, 128)
test data after removing val (30, 201, 1, 128, 128)


In [19]:
# jnp.save("kuramoto2D_train_data_128x128_100ic_50t.npy", train_data)
# jnp.save("kuramoto2D_val_data_128x128_10ic_50t.npy", val_data)
# jnp.save("kuramoto2D_test_data_128x128_30ic_200t.npy", test_data)


Data from advection diffusion scenario

In [20]:
advdiff2D_scenario = apebench.scenarios.difficulty.AdvectionDiffusion(
 
 optim_config="adam;10_000;constant;1e-4",

 
 report_metrics="mean_nRMSE", # "mean_nRMSE,mean_fourier_nRMSE;0;5;0,mean_H1_nRMSE"
 
 num_points = spatial_grid_points, # discretized points on the spatial 

 num_spatial_dims = spatial_dims, # spatial dimensions 
 
 num_train_samples = num_train_samples, # number of ICs for train set
 
 train_temporal_horizon = train_time_steps, # consecutive time steps the simulator is run

 train_seed = 0,

 num_test_samples = num_test_samples, # number of ICs for test set
 
 test_temporal_horizon = test_time_steps, # consecutive time steps the simulator is run for test set

 test_seed = 773,
)

In [22]:
# data from advection diffusion
train_data = advdiff2D_scenario.get_train_data()
print('train data shape',train_data.shape)

test_ic_set = advdiff2D_scenario.get_test_ic_set()
print('test ICs shape',test_ic_set.shape)

full_test_data = advdiff2D_scenario.get_test_data() 
print('complete test data shape',full_test_data.shape)

val_data = full_test_data[:10,:51,:,:]
print('val data',val_data.shape)

test_data = full_test_data[10:,:,:,:]
print('test data after removing val',test_data.shape)



train data shape (100, 51, 1, 128, 128)
test ICs shape (40, 1, 128, 128)
complete test data shape (40, 201, 1, 128, 128)
val data (10, 51, 1, 128, 128)
test data after removing val (30, 201, 1, 128, 128)


In [25]:
# jnp.save("advdiff2D_train_data_128x128_100ic_50t.npy", train_data)
# jnp.save("advdiff2D_val_data_128x128_10ic_50t.npy", val_data)
# jnp.save("advdiff2D_test_data_128x128_30ic_200t.npy", test_data)


In [24]:
burgers2D_scenario = apebench.scenarios.difficulty.Burgers(
 
 optim_config="adam;10_000;constant;1e-4",

 report_metrics="mean_nRMSE", # "mean_nRMSE,mean_fourier_nRMSE;0;5;0,mean_H1_nRMSE"
 
 num_points = spatial_grid_points, # discretized points on the spatial 

 num_spatial_dims = spatial_dims, # spatial dimensions 
 
 num_train_samples = num_train_samples, # number of ICs for train set
 
 train_temporal_horizon = train_time_steps, # consecutive time steps the simulator is run

 train_seed = 0,

 num_test_samples = num_test_samples, # number of ICs for test set
 
 test_temporal_horizon = test_time_steps, # consecutive time steps the simulator is run for test set

 test_seed = 773,
 
)

In [26]:
# data from burgers
train_data = burgers2D_scenario.get_train_data()
print('train data shape',train_data.shape)

test_ic_set = burgers2D_scenario.get_test_ic_set()
print('test ICs shape',test_ic_set.shape)

full_test_data = burgers2D_scenario.get_test_data() 
print('complete test data shape',full_test_data.shape)

val_data = full_test_data[:10,:51,:,:]
print('val data',val_data.shape)

test_data = full_test_data[10:,:,:,:]
print('test data after removing val',test_data.shape)



train data shape (100, 51, 2, 128, 128)
test ICs shape (40, 2, 128, 128)
complete test data shape (40, 201, 2, 128, 128)
val data (10, 51, 2, 128, 128)
test data after removing val (30, 201, 2, 128, 128)


In [27]:
# jnp.save("burgers2D_train_data_128x128_100ic_50t.npy", train_data)
# jnp.save("burgers2D_val_data_128x128_10ic_50t.npy", val_data)
# jnp.save("burgers2D_test_data_128x128_30ic_200t.npy", test_data)


In [29]:
fisher2D_scenario = apebench.scenarios.difficulty.FisherKPP(
 
 optim_config="adam;10_000;constant;1e-4",

 
 report_metrics="mean_nRMSE", # "mean_nRMSE,mean_fourier_nRMSE;0;5;0,mean_H1_nRMSE"
 
 num_points = spatial_grid_points, # discretized points on the spatial 

 num_spatial_dims = spatial_dims, # spatial dimensions 
 
 num_train_samples = num_train_samples, # number of ICs for train set
 
 train_temporal_horizon = train_time_steps, # consecutive time steps the simulator is run

 train_seed = 0,

 num_test_samples = num_test_samples, # number of ICs for test set
 
 test_temporal_horizon = test_time_steps, # consecutive time steps the simulator is run for test set

 test_seed = 773,
)

In [30]:
# data from fisher-kpp
train_data = fisher2D_scenario.get_train_data()
print('train data shape',train_data.shape)

test_ic_set = fisher2D_scenario.get_test_ic_set()
print('test ICs shape',test_ic_set.shape)

full_test_data = fisher2D_scenario.get_test_data() 
print('complete test data shape',full_test_data.shape)

val_data = full_test_data[:10,:51,:,:]
print('val data',val_data.shape)

test_data = full_test_data[10:,:,:,:]
print('test data after removing val',test_data.shape)



train data shape (100, 51, 1, 128, 128)
test ICs shape (40, 1, 128, 128)
complete test data shape (40, 201, 1, 128, 128)
val data (10, 51, 1, 128, 128)
test data after removing val (30, 201, 1, 128, 128)


In [31]:
# jnp.save("fisher2D_train_data_128x128_100ic_50t.npy", train_data)
# jnp.save("fisher2D_val_data_128x128_10ic_50t.npy", val_data)
# jnp.save("fisher2D_test_data_128x128_30ic_200t.npy", test_data)


2D Kolmogorov flow. Kolmogorov forcing of Navier stokes:

In [32]:
kolmflow2D_scenario = apebench.scenarios.physical.KolmogorovFlow(
 
 optim_config="adam;10_000;constant;1e-4",

 report_metrics="mean_nRMSE", # "mean_nRMSE,mean_fourier_nRMSE;0;5;0,mean_H1_nRMSE"
 
 num_points = spatial_grid_points, # discretized points on the spatial 

 num_spatial_dims = spatial_dims, # spatial dimensions 
 
 num_train_samples = num_train_samples, # number of ICs for train set
 
 train_temporal_horizon = train_time_steps, # consecutive time steps the simulator is run

 train_seed = 0,

 num_test_samples = num_test_samples, # number of ICs for test set
 
 test_temporal_horizon = test_time_steps, # consecutive time steps the simulator is run for test set

 test_seed = 773,
)

In [33]:
# data from NS-kolm flow
train_data = kolmflow2D_scenario.get_train_data()
print('train data shape',train_data.shape)

test_ic_set = kolmflow2D_scenario.get_test_ic_set()
print('test ICs shape',test_ic_set.shape)

full_test_data = kolmflow2D_scenario.get_test_data() 
print('complete test data shape',full_test_data.shape)

val_data = full_test_data[:10,:51,:,:]
print('val data',val_data.shape)

test_data = full_test_data[10:,:,:,:]
print('test data after removing val',test_data.shape)

train data shape (100, 51, 1, 128, 128)
test ICs shape (40, 1, 128, 128)
complete test data shape (40, 201, 1, 128, 128)
val data (10, 51, 1, 128, 128)
test data after removing val (30, 201, 1, 128, 128)


In [34]:
# jnp.save("kolmflow2D_train_data_128x128_100ic_50t.npy", train_data)
# jnp.save("kolmflow2D/kolmflow2D_val_data_128x128_10ic_50t.npy", val_data)
# jnp.save("kolmflow2D/kolmflow2D_test_data_128x128_30ic_200t.npy", test_data)


In [35]:
grayscott2D_scenario = apebench.scenarios.physical.GrayScott(
 
 optim_config="adam;10_000;constant;1e-4",

 report_metrics="mean_nRMSE", # "mean_nRMSE,mean_fourier_nRMSE;0;5;0,mean_H1_nRMSE"
 
 num_points = spatial_grid_points, # discretized points on the spatial 

 num_spatial_dims = spatial_dims, # spatial dimensions 
 
 num_train_samples = num_train_samples, # number of ICs for train set
 
 train_temporal_horizon = train_time_steps, # consecutive time steps the simulator is run

 train_seed = 0,

 num_test_samples = num_test_samples, # number of ICs for test set
 
 test_temporal_horizon = test_time_steps, # consecutive time steps the simulator is run for test set

 test_seed = 773,
)

In [36]:
# data from gray scott
train_data = grayscott2D_scenario.get_train_data()
print('train data shape',train_data.shape)

test_ic_set = grayscott2D_scenario.get_test_ic_set()
print('test ICs shape',test_ic_set.shape)

full_test_data = grayscott2D_scenario.get_test_data() # [:,1:,:,:] # removing the initial condition from test set
print('complete test data shape',full_test_data.shape)

val_data = full_test_data[:10,:51,:,:]
print('val data',val_data.shape)

test_data = full_test_data[10:,:,:,:]
print('test data after removing val',test_data.shape)



train data shape (100, 51, 2, 128, 128)
test ICs shape (40, 2, 128, 128)
complete test data shape (40, 201, 2, 128, 128)
val data (10, 51, 2, 128, 128)
test data after removing val (30, 201, 2, 128, 128)


In [38]:
# jnp.save("grayscott2D_train_data_128x128_100ic_50t.npy", train_data)
# jnp.save("grayscott2D_val_data_128x128_10ic_50t.npy", val_data)
# jnp.save("grayscott2D_test_data_128x128_30ic_200t.npy", test_data)


In [39]:
dypdiff2D_scenario = apebench.scenarios.difficulty.HyperDiffusion(
 
 optim_config="adam;10_000;constant;1e-4",

 report_metrics="mean_nRMSE", # "mean_nRMSE,mean_fourier_nRMSE;0;5;0,mean_H1_nRMSE"
 
 num_points = spatial_grid_points, # discretized points on the spatial 

 num_spatial_dims = spatial_dims, # spatial dimensions 
 
 num_train_samples = num_train_samples, # number of ICs for train set
 
 train_temporal_horizon = train_time_steps, # consecutive time steps the simulator is run

 train_seed = 0,

 num_test_samples = num_test_samples, # number of ICs for test set
 
 test_temporal_horizon = test_time_steps, # consecutive time steps the simulator is run for test set

 test_seed = 773,
)

In [40]:
# data from hyper diffusion
train_data = dypdiff2D_scenario.get_train_data()
print('train data shape',train_data.shape)

test_ic_set = dypdiff2D_scenario.get_test_ic_set()
print('test ICs shape',test_ic_set.shape)

full_test_data = dypdiff2D_scenario.get_test_data() # [:,1:,:,:] # removing the initial condition from test set
print('complete test data shape',test_data.shape)

val_data = full_test_data[:10,:51,:,:]
print('val data',val_data.shape)

test_data = full_test_data[10:,:,:,:]
print('test data after removing val',test_data.shape)

train data shape (100, 51, 1, 128, 128)
test ICs shape (40, 1, 128, 128)
complete test data shape (30, 201, 2, 128, 128)
val data (10, 51, 1, 128, 128)
test data after removing val (30, 201, 1, 128, 128)


In [41]:
# jnp.save("dypdiff2D_train_data_128x128_100ic_50t.npy", train_data)
# jnp.save("dypdiff2D_val_data_128x128_10ic_50t.npy", val_data)
# jnp.save("dypdiff2D_test_data_128x128_30ic_200t.npy", test_data)
