| { | |
| "model_type": "MGMT2S", | |
| "preprocess": { | |
| "hop_size": 480, | |
| "sample_rate": 24000, | |
| "n_fft": 1920, | |
| "num_mels": 128, | |
| "win_size": 1920, | |
| "fmin": 0, | |
| "fmax": 12000, | |
| "mel_var": 8.14, | |
| "mel_mean": -4.92, | |
| "tokenizer_path": "phi" | |
| }, | |
| "model": { | |
| "mgmt2s": { | |
| "hidden_size": 1280, | |
| "num_layers": 16, | |
| "num_heads": 16, | |
| "cfg_scale": 0.15, | |
| "cond_codebook_size": 16384, | |
| "cond_dim": 1024, | |
| "phone_vocab_size": 32100 | |
| } | |
| } | |
| } | |