| { | |
| "autoencoder": { | |
| "hidden_dim": 512, | |
| "n_convs": [ | |
| 6, | |
| 6 | |
| ], | |
| "strides": [ | |
| 1, | |
| 2 | |
| ], | |
| "stride_kernel_sizes": [ | |
| null, | |
| 7 | |
| ], | |
| "mels": 100, | |
| "n_q": 8, | |
| "codebook_size": 512, | |
| "codebook_dim": 256, | |
| "semantic_dim": 768 | |
| }, | |
| "flow_matcher": { | |
| "sigma_min": 0.0001, | |
| "mels": 100, | |
| "channels": 256, | |
| "n_convs": [ | |
| 8, | |
| 8, | |
| 8, | |
| 8 | |
| ], | |
| "strides": [ | |
| 1, | |
| 1, | |
| 1, | |
| 2 | |
| ] | |
| }, | |
| "mel_transform": { | |
| "sample_rate": 24000, | |
| "n_fft": 1024, | |
| "n_mels": 100, | |
| "hop_length": 256, | |
| "center": true, | |
| "power": 1.0 | |
| } | |
| } |