| { |
| "model_type": "dramabox-tts", |
| "architecture": "DiT-FlowMatching", |
| "base_model": "ltx-2.3-22b-dev-audio-only", |
| "parameters": "3.3B", |
| "num_layers": 48, |
| "audio_inner_dim": 2048, |
| "audio_num_attention_heads": 32, |
| "audio_attention_head_dim": 64, |
| "audio_cross_attention_dim": 2048, |
| "denoising_steps": 30, |
| "scheduler": "euler_flow_matching", |
| "text_encoder": "mlx-community/gemma-3-12b-it-8bit", |
| "text_encoder_hidden_size": 3840, |
| "ic_lora": { |
| "rank": 128, |
| "alpha": 128, |
| "merged": true, |
| "training_version": "v13", |
| "text_dropout": 0.4, |
| "training_steps": "v12@3000 + v13@1000" |
| }, |
| "audio": { |
| "sample_rate": 48000, |
| "vae_channels": 8, |
| "mel_bins": 16, |
| "fps": 25.0 |
| }, |
| "inference_defaults": { |
| "cfg_scale": 2.5, |
| "stg_scale": 1.5, |
| "rescale_scale": "auto", |
| "modality_scale": 1.0, |
| "duration_multiplier": 1.1, |
| "seed": 42 |
| }, |
| "files": { |
| "transformer": "dramabox-dit-v1.safetensors", |
| "audio_components": "dramabox-audio-components.safetensors", |
| "silence_latent": "assets/silence_latent_frame.pt" |
| }, |
| "mlx_audio": { |
| "source_repo": "ResembleAI/Dramabox", |
| "weight_format": "split_safetensors", |
| "watermarking": "skipped" |
| } |
| } |
|
|