| { |
| "buffer_kwargs": { |
| "batch_dims_test": { |
| "20": { |
| "device": [ |
| 1 |
| ] |
| }, |
| "100": { |
| "device": [ |
| 1 |
| ] |
| } |
| }, |
| "batch_dims_train": { |
| "2": { |
| "device": [ |
| 8, |
| 27 |
| ], |
| "effective": 216 |
| }, |
| "5": { |
| "device": [ |
| 8, |
| 23 |
| ], |
| "effective": 184 |
| }, |
| "10": { |
| "device": [ |
| 8, |
| 19 |
| ], |
| "effective": 152 |
| }, |
| "20": { |
| "device": [ |
| 8, |
| 13 |
| ], |
| "effective": 104 |
| }, |
| "30": { |
| "device": [ |
| 8, |
| 9 |
| ], |
| "effective": 72 |
| }, |
| "40": { |
| "device": [ |
| 8, |
| 7 |
| ], |
| "effective": 56 |
| }, |
| "50": { |
| "device": [ |
| 8, |
| 6 |
| ], |
| "effective": 48 |
| } |
| }, |
| "buffer_class": "FIFOBuffer", |
| "buffer_size": 200, |
| "config": "/cluster/home/llorch/prog/amortized-dibs/experiments/linear-base/train.yaml", |
| "double_cache_train": true, |
| "n_listeners": 8, |
| "n_workers": null, |
| "seed": 0, |
| "train_n_observations_int": 50, |
| "train_n_observations_obs": 150 |
| }, |
| "inference_model_kwargs": { |
| "acyclicity": "dual", |
| "acyclicity_pow_iters": 10, |
| "bernoulli": "sigmoid", |
| "kl_mixt_pen": false, |
| "kl_mixt_wgt": 1.0, |
| "label_smoothing": 0.0, |
| "loss": "xent", |
| "mask_diag": true, |
| "mixture_k": 1, |
| "mixture_net": true, |
| "pos_weight": 1.0, |
| "standardize_v": 0, |
| "train_p_obs_only": 0.5 |
| }, |
| "neural_net_kwargs": { |
| "model_class": "BaseModel", |
| "model_kwargs": { |
| "cosine_sim": true, |
| "cosine_temp_init": 2.0, |
| "dim": 128, |
| "dropout": 0.0, |
| "key_size": 32, |
| "layers": 8, |
| "ln_axis": -1, |
| "mixture_drop": 0.0, |
| "n_mixtures": 1, |
| "num_heads": 8, |
| "out_dim": null, |
| "widening_factor": 4 |
| } |
| }, |
| "train_script_kwargs": { |
| "SAB_num_heads": 8, |
| "acc_grad": null, |
| "activation": "relu", |
| "acyc": "dual", |
| "acyc_burnin": 50000, |
| "acyc_const": 1.0, |
| "acyc_dual_lr": 0.0001, |
| "acyc_inner_step": 500, |
| "acyc_lin": 1.0, |
| "acyc_polyak": 0.0001, |
| "acyc_powit": 10, |
| "acyc_warmup": true, |
| "agg": "max", |
| "batch_n": 3, |
| "bernoulli": "sigmoid", |
| "block": "inter-SAB", |
| "bsu": 300, |
| "buffer_size": 200, |
| "checkpoint": true, |
| "checkpoint_dir": "/cluster/project/infk/krause/llorch/amortibs/checkpoints/experiments/linear-base/ours_01", |
| "checkpoint_every": 10000, |
| "chunk_key_size": 1024, |
| "chunk_query_size": 1024, |
| "config": "/cluster/home/llorch/prog/amortized-dibs/experiments/linear-base/train.yaml", |
| "cosine_sim": true, |
| "cosine_temp_init": 2.0, |
| "cross": "Ndd", |
| "curriculum": "equal-nvars", |
| "descr": "ours_01-cosine-init=2", |
| "detailed_log": false, |
| "dim": 128, |
| "dropout_rate": 0.0, |
| "estimate_eval_memory": false, |
| "eval_every": 15000, |
| "eval_remat": false, |
| "final_init_scaling": false, |
| "grad_clip": true, |
| "grad_clip_value": 1.0, |
| "group_scratch": true, |
| "identity_embedding": "IModule", |
| "ieee": false, |
| "intermediate_ffn": true, |
| "isab_ffn": true, |
| "isab_k": 2, |
| "key_size": 32, |
| "kl_mixt_pen": false, |
| "kl_mixt_wgt": 1.0, |
| "label_smoothing": 0.0, |
| "lim_n_obs": null, |
| "ln_axis": "last", |
| "ln_final": true, |
| "ln_glob": true, |
| "ln_loc": true, |
| "ln_split": true, |
| "log_every": 1000, |
| "long_final": false, |
| "long_init": false, |
| "loss": "xent", |
| "lr": 3e-05, |
| "lr_scaling": "sqrt", |
| "mask_diag": true, |
| "matrix_bias": true, |
| "mem_alloc": null, |
| "mem_check": false, |
| "mem_check_N": 100, |
| "mem_check_d": 100, |
| "mem_check_fwd": true, |
| "mixture_drop": 0.0, |
| "mixture_k": 1, |
| "mixture_net": true, |
| "n_glob": 0, |
| "n_listeners": 8, |
| "n_loc": 8, |
| "n_per_block": 1, |
| "n_split": 0, |
| "n_steps": 300000, |
| "n_workers": null, |
| "nn": "BaseModel", |
| "online": true, |
| "only_eval": false, |
| "optimize_stack": false, |
| "optimizer": "lamb", |
| "out_dim": null, |
| "p_obs_only": 0.5, |
| "p_small_data": 0.0, |
| "p_small_factor": 0.1, |
| "pooling": "max", |
| "pos_wgt": 1.0, |
| "pre_ln": true, |
| "preallocate_gpu": true, |
| "project": "experiment-linear-base", |
| "relation_net": false, |
| "relaunch": true, |
| "relaunch_after": 1380.0, |
| "relaunch_bsub": "bsub -W 23:59 -R \"rusage[mem=3000]\" -R \"rusage[ngpus_excl_p=8]\" -n 128 -R \"span[hosts=1]\" -R \"select[(gpu_model0==NVIDIATITANRTX || gpu_model0==QuadroRTX6000)]\" -J \"linear-base-ours_01-cosine-init=2\" -o \"lsf.o-linear-base-ours_01-cosine-init=2.txt\" ", |
| "scaled_init": false, |
| "scan_eval": false, |
| "scan_eval_size": 500, |
| "schedule": "piecewise_const_200k_300k", |
| "seed": 0, |
| "skip_connection_e": false, |
| "smoke_test": false, |
| "standardize_v": 0, |
| "store_wandb_locally": false, |
| "train_n_int": 50, |
| "train_n_obs": 150, |
| "visualize_data_distribution": false, |
| "visualize_diff": false, |
| "wandb_id": "3v8lk56w", |
| "weight_decay": 0.0, |
| "widening_factor": 4 |
| }, |
| "updater": { |
| "acyclicity_burnin": 50000, |
| "acyclicity_dual_lr": 0.0001, |
| "acyclicity_inner_step": 500, |
| "acyclicity_warmup": true, |
| "local_device_count": 8, |
| "polyak_rate": 0.0001 |
| } |
| } |