| { | |
| "model_type": "dramabox-tts", | |
| "architecture": "DiT-FlowMatching", | |
| "base_model": "ltx-2.3-22b-dev-audio-only", | |
| "parameters": "3.3B", | |
| "num_layers": 48, | |
| "audio_inner_dim": 2048, | |
| "audio_num_attention_heads": 32, | |
| "audio_attention_head_dim": 64, | |
| "audio_cross_attention_dim": 2048, | |
| "denoising_steps": 30, | |
| "scheduler": "euler_flow_matching", | |
| "text_encoder": "google/gemma-3-12b-it-qat-q4_0-unquantized", | |
| "text_encoder_hidden_size": 3840, | |
| "ic_lora": { | |
| "rank": 128, | |
| "alpha": 128, | |
| "merged": true, | |
| "training_version": "v13", | |
| "text_dropout": 0.4, | |
| "training_steps": "v12@3000 + v13@1000" | |
| }, | |
| "audio": { | |
| "sample_rate": 48000, | |
| "vae_channels": 8, | |
| "mel_bins": 16, | |
| "fps": 25.0 | |
| }, | |
| "inference_defaults": { | |
| "cfg_scale": 2.5, | |
| "stg_scale": 1.5, | |
| "rescale_scale": 0.0, | |
| "modality_scale": 1.0, | |
| "duration_multiplier": 1.1, | |
| "seed": 42 | |
| }, | |
| "files": { | |
| "transformer": "dramabox-dit-v1.safetensors", | |
| "audio_components": "dramabox-audio-components.safetensors", | |
| "silence_latent": "assets/silence_latent_frame.pt" | |
| } | |
| } | |