Spaces:
No application file
No application file
| trainer: "any-order-flow" | |
| dataset: "wikitext2" | |
| model: | |
| hidden_size: 256 | |
| n_heads: 4 | |
| cond_dim: 64 | |
| dropout: 0.1 | |
| n_blocks: 4 | |
| interpolant: | |
| type: "any-order" | |
| tokens: null # filled in automatically | |
| pad_token: null # filled in automatically | |
| mask_token: null # filled in automatically | |
| max_length: 512 | |
| insert_schedule: | |
| type: "linear" | |
| unmask_schedule: | |
| type: "linear" | |
| training: | |
| batch_size: 512 | |
| learning_rate: 3e-4 | |
| nodes: 2 | |
| devices: 4 | |
| max_steps: 10000 | |
| weight_decay: 0.03 | |
| checkpoint_dir: "checkpoints/wikitext2/any_order" | |
| save_top_k: 1 | |
| save_every_n_epochs: 1 | |
| loss_fn: | |
| unmask: "elbo" | |
| insert: "expectation" | |
| warmup_steps: 2000 | |
| ema_decay: 0.9999 | |
| filter_max_length: false | |
| wandb: | |
| entity: "jaeyeon_kim-harvard-university" | |
| project: "interpretable-flow" | |
| name: "wikitext2-any-order" | |