| # JiT + CFM training (train_cfm_jit.py --config) | |
| # Smallest practical JiT for 32x32 RGB: 1 patch, 1 block, narrow hidden dim. | |
| sigma: 0.0 | |
| # Must match training data; in_channels = dim[0], input_size = dim[1] = dim[2] | |
| dim: [3, 32, 32] | |
| lr: 1.0e-4 | |
| weight_decay: 0.0 | |
| inference_steps: 50 | |
| vis_batch_size: 4 | |
| # JiT (jit_model_unconditional.JiT) — minimal footprint | |
| # Single 32×32 patch → 1 token; depth 1; hidden_size divisible by num_heads | |
| input_size: 32 | |
| patch_size: 2 | |
| hidden_size: 512 | |
| depth: 6 | |
| num_heads: 2 | |
| mlp_ratio: 2.0 | |
| attn_drop: 0.0 | |
| proj_drop: 0.0 | |
| bottleneck_dim: 128 | |
| in_context_len: 32 | |
| in_context_start: 0 |