| { | |
| "_class_name": "DiT", | |
| "_diffusers_version": "0.25.0", | |
| "_name_or_path": "f5-tts/dit", | |
| "act_fn": "silu", | |
| "attention_head_dim": 64, | |
| "cross_attention_dim": null, | |
| "dropout": 0.1, | |
| "ff_mult": 4, | |
| "in_channels": 100, | |
| "layers_per_block": 1, | |
| "num_attention_heads": 8, | |
| "num_hidden_layers": 8, | |
| "out_channels": 100, | |
| "sample_size": 2048, | |
| "flip_sin_to_cos": false, | |
| "freq_shift": 0, | |
| "use_linear_projection": false, | |
| "projection_class_embeddings_input_dim": null, | |
| "num_classes": null, | |
| "time_embedding_type": "fourier", | |
| "timestep_post_act": "silu", | |
| "time_cond_proj_dim": null, | |
| "class_embed_type": null, | |
| "transformer_in_channels": null | |
| } |