| model: | |
| model_id: "dit" | |
| width: 24 | |
| height: 24 | |
| T: 1000 | |
| in_channels: 3 | |
| n_window: 30 | |
| patch_size: 3 | |
| n_heads: 20 | |
| d_model: 320 | |
| n_blocks: 8 | |
| C: 5000 | |
| bidirectional: false | |
| nocompile: false | |
| checkpoint: "experiments/jolly-dust-782" | |
| rope_type: "rope" | |
| use_flex: true | |
| train: | |
| lr1: 0.02 | |
| lr2: 3.0e-4 | |
| betas: [0.9, 0.95] | |
| weight_decay: 1.0e-5 | |
| max_steps: 2500 | |
| warmup_steps: 100 | |
| batch_size: 64 | |
| noclip: false | |
| duration: 1 | |
| fps: 30 | |
| debug: false | |
| dtype: "bf16" | |
| action_dropout: 0.2 | |
| wandb: | |
| name: null | |
| project: "toy-wm" | |
| run_name: "causal-layers8-heads12-d384" | |