| { | |
| "d_model": 1024, | |
| "max_text_len": 32, | |
| "batch_size": 256, | |
| "eval_batch_size": 512, | |
| "train_num_workers": 4, | |
| "eval_num_workers": 2, | |
| "epochs": 3, | |
| "lr": 0.0003, | |
| "weight_decay": 0.01, | |
| "min_lr": 1e-06, | |
| "grad_clip": 1.0, | |
| "align_samples": 5000, | |
| "whiten_procrustes": true, | |
| "enforce_rotation_only": false, | |
| "pw": 0.1, | |
| "aw": 0.05, | |
| "checkpoint_dir": "/home/claude/bertenstein_checkpoints", | |
| "tensorboard_dir": "/home/claude/bertenstein_tb", | |
| "save_every_epoch": true, | |
| "log_every_n_steps": 10 | |
| } |