avici / neurips-linear /kwargs.json
larslorch's picture
Upload 6 files
ed55867
{
"buffer_kwargs": {
"batch_dims_test": {
"20": {
"device": [
1
]
},
"100": {
"device": [
1
]
}
},
"batch_dims_train": {
"2": {
"device": [
8,
27
],
"effective": 216
},
"5": {
"device": [
8,
23
],
"effective": 184
},
"10": {
"device": [
8,
19
],
"effective": 152
},
"20": {
"device": [
8,
13
],
"effective": 104
},
"30": {
"device": [
8,
9
],
"effective": 72
},
"40": {
"device": [
8,
7
],
"effective": 56
},
"50": {
"device": [
8,
6
],
"effective": 48
}
},
"buffer_class": "FIFOBuffer",
"buffer_size": 200,
"config": "/cluster/home/llorch/prog/amortized-dibs/experiments/linear-base/train.yaml",
"double_cache_train": true,
"n_listeners": 8,
"n_workers": null,
"seed": 0,
"train_n_observations_int": 50,
"train_n_observations_obs": 150
},
"inference_model_kwargs": {
"acyclicity": "dual",
"acyclicity_pow_iters": 10,
"bernoulli": "sigmoid",
"kl_mixt_pen": false,
"kl_mixt_wgt": 1.0,
"label_smoothing": 0.0,
"loss": "xent",
"mask_diag": true,
"mixture_k": 1,
"mixture_net": true,
"pos_weight": 1.0,
"standardize_v": 0,
"train_p_obs_only": 0.5
},
"neural_net_kwargs": {
"model_class": "BaseModel",
"model_kwargs": {
"cosine_sim": true,
"cosine_temp_init": 2.0,
"dim": 128,
"dropout": 0.0,
"key_size": 32,
"layers": 8,
"ln_axis": -1,
"mixture_drop": 0.0,
"n_mixtures": 1,
"num_heads": 8,
"out_dim": null,
"widening_factor": 4
}
},
"train_script_kwargs": {
"SAB_num_heads": 8,
"acc_grad": null,
"activation": "relu",
"acyc": "dual",
"acyc_burnin": 50000,
"acyc_const": 1.0,
"acyc_dual_lr": 0.0001,
"acyc_inner_step": 500,
"acyc_lin": 1.0,
"acyc_polyak": 0.0001,
"acyc_powit": 10,
"acyc_warmup": true,
"agg": "max",
"batch_n": 3,
"bernoulli": "sigmoid",
"block": "inter-SAB",
"bsu": 300,
"buffer_size": 200,
"checkpoint": true,
"checkpoint_dir": "/cluster/project/infk/krause/llorch/amortibs/checkpoints/experiments/linear-base/ours_01",
"checkpoint_every": 10000,
"chunk_key_size": 1024,
"chunk_query_size": 1024,
"config": "/cluster/home/llorch/prog/amortized-dibs/experiments/linear-base/train.yaml",
"cosine_sim": true,
"cosine_temp_init": 2.0,
"cross": "Ndd",
"curriculum": "equal-nvars",
"descr": "ours_01-cosine-init=2",
"detailed_log": false,
"dim": 128,
"dropout_rate": 0.0,
"estimate_eval_memory": false,
"eval_every": 15000,
"eval_remat": false,
"final_init_scaling": false,
"grad_clip": true,
"grad_clip_value": 1.0,
"group_scratch": true,
"identity_embedding": "IModule",
"ieee": false,
"intermediate_ffn": true,
"isab_ffn": true,
"isab_k": 2,
"key_size": 32,
"kl_mixt_pen": false,
"kl_mixt_wgt": 1.0,
"label_smoothing": 0.0,
"lim_n_obs": null,
"ln_axis": "last",
"ln_final": true,
"ln_glob": true,
"ln_loc": true,
"ln_split": true,
"log_every": 1000,
"long_final": false,
"long_init": false,
"loss": "xent",
"lr": 3e-05,
"lr_scaling": "sqrt",
"mask_diag": true,
"matrix_bias": true,
"mem_alloc": null,
"mem_check": false,
"mem_check_N": 100,
"mem_check_d": 100,
"mem_check_fwd": true,
"mixture_drop": 0.0,
"mixture_k": 1,
"mixture_net": true,
"n_glob": 0,
"n_listeners": 8,
"n_loc": 8,
"n_per_block": 1,
"n_split": 0,
"n_steps": 300000,
"n_workers": null,
"nn": "BaseModel",
"online": true,
"only_eval": false,
"optimize_stack": false,
"optimizer": "lamb",
"out_dim": null,
"p_obs_only": 0.5,
"p_small_data": 0.0,
"p_small_factor": 0.1,
"pooling": "max",
"pos_wgt": 1.0,
"pre_ln": true,
"preallocate_gpu": true,
"project": "experiment-linear-base",
"relation_net": false,
"relaunch": true,
"relaunch_after": 1380.0,
"relaunch_bsub": "bsub -W 23:59 -R \"rusage[mem=3000]\" -R \"rusage[ngpus_excl_p=8]\" -n 128 -R \"span[hosts=1]\" -R \"select[(gpu_model0==NVIDIATITANRTX || gpu_model0==QuadroRTX6000)]\" -J \"linear-base-ours_01-cosine-init=2\" -o \"lsf.o-linear-base-ours_01-cosine-init=2.txt\" ",
"scaled_init": false,
"scan_eval": false,
"scan_eval_size": 500,
"schedule": "piecewise_const_200k_300k",
"seed": 0,
"skip_connection_e": false,
"smoke_test": false,
"standardize_v": 0,
"store_wandb_locally": false,
"train_n_int": 50,
"train_n_obs": 150,
"visualize_data_distribution": false,
"visualize_diff": false,
"wandb_id": "3v8lk56w",
"weight_decay": 0.0,
"widening_factor": 4
},
"updater": {
"acyclicity_burnin": 50000,
"acyclicity_dual_lr": 0.0001,
"acyclicity_inner_step": 500,
"acyclicity_warmup": true,
"local_device_count": 8,
"polyak_rate": 0.0001
}
}