{ "_class_name": "DiT", "_diffusers_version": "0.25.0", "_name_or_path": "f5-tts/dit", "act_fn": "silu", "attention_head_dim": 64, "cross_attention_dim": null, "dropout": 0.1, "ff_mult": 4, "in_channels": 100, "layers_per_block": 1, "num_attention_heads": 8, "num_hidden_layers": 8, "out_channels": 100, "sample_size": 2048, "flip_sin_to_cos": false, "freq_shift": 0, "use_linear_projection": false, "projection_class_embeddings_input_dim": null, "num_classes": null, "time_embedding_type": "fourier", "timestep_post_act": "silu", "time_cond_proj_dim": null, "class_embed_type": null, "transformer_in_channels": null }