{ "latent_channels": 16, "cond_dim": 128, "num_blocks": 2, "num_heads": 2, "head_dim": 32, "T_inner": 2, "T_outer": 1, "ffn_mult": 2.0, "dropout": 0.0, "use_ift": false, "encoder_base_ch": 32, "decoder_base_ch": 64, "vocab_size": 32000, "max_text_length": 77, "text_layers": 2, "text_heads": 2 }