{ "embed_frequency": 8, "embed_include_pi": false, "framestep_encoding_strategy": "linear", "in_channels": 3, "in_extra_channels": 3, "latent_channels": 64, "num_attention_heads": 8, "num_freqs_ts": 128, "num_layers": 16, "out_dim": 3, "prediction_mode": "direct", "temporal_context_size": 16, "width": 1024 }