| { | |
| "embed_frequency": 8, | |
| "embed_include_pi": false, | |
| "framestep_encoding_strategy": "linear", | |
| "in_channels": 3, | |
| "in_extra_channels": 3, | |
| "latent_channels": 64, | |
| "num_attention_heads": 8, | |
| "num_freqs_ts": 128, | |
| "num_layers": 16, | |
| "out_dim": 3, | |
| "prediction_mode": "direct", | |
| "temporal_context_size": 16, | |
| "width": 1024 | |
| } |